Importing Main Dataset


#Data File: CleanDNAprepData1.18.19
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(emmeans)
library(multcompView)

#Read Data
AllData <- read.table("CleanDNAprepData1.18.19.txt", sep="\t",  fill = TRUE, header=TRUE)
SampleData <- AllData %>% filter(VariableSampleType!="Standard", VariableSampleType!="NP40InoculatedMilk")
dim (SampleData)
## [1] 1440   42
write.table (SampleData, "SampleData.txt", sep="\t" )

#Getting All variables and all levels within each

#sapply(SampleData, levels)
#SampleData %>% 
#    sapply(levels)

#Summary Statistics
SampleData.summary <- SampleData %>%
  group_by(Assay,VariableKit,VariableSampleType) %>%
  summarize(mean_CopyN_permLMilk=mean(LogCopiespermLofMilk,na.rm=T),
            st_dev=sd(LogCopiespermLofMilk,na.rm=T),
            n_missing=sum(is.na(LogCopiespermLofMilk)),
            n_total=n())
## `summarise()` regrouping output by 'Assay', 'VariableKit' (override with `.groups` argument)
write.table (SampleData.summary, "SampleData.summary.txt", sep="\t" ) 

SampleData.summary.by.replicate <- SampleData %>%
  group_by(Assay, SpikeSet, VariableKit) %>%
  summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
            st_dev=sd(LogCopiespermLofMilk,na.rm=T),
            n_missing=sum(is.na(LogCopiespermLofMilk)),
            n_total=n())%>%
  data.frame()
## `summarise()` regrouping output by 'Assay', 'SpikeSet' (override with `.groups` argument)
write.table (SampleData.summary.by.replicate, "SampleData.summary.by.replicate.txt", sep="\t" )

Bovine


Total Bovine Copy Numbers

#Filter Subset from Sample Data
Bovine <- SampleData %>% filter(Assay=="Bovine DNA")
dim(Bovine)
## [1] 240  42
#Summary Statistics
Bovine.summary <- Bovine %>%
  group_by(VariableKit,VariableSampleType) %>%
  summarize(mean_CopyN_permLMilk=mean(LogCopiespermLofMilk,na.rm=T),
            st_dev=sd(LogCopiespermLofMilk,na.rm=T),
            n_missing=sum(is.na(LogCopiespermLofMilk)),
            n_total=n())
## `summarise()` regrouping output by 'VariableKit' (override with `.groups` argument)
write.table (Bovine.summary, "Bovine.summary.txt", sep="\t" )
#Plot Raw Means and Standard Deviations
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2338B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(data=Bovine,
       mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk,z=VariableKit, color=VariableKit)) +
  ylab ("Log10 Copies / mL of Milk")+
     geom_boxplot(lwd=1)+
  theme_bw()+
 ggtitle("Bovine DNA Copy Numbers")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk",  "MockCommunity", "NoTemplateControl"))
## Warning: Removed 80 rows containing non-finite values (stat_boxplot).

ggplot(data=Bovine,
       mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
  ylab ("Log10 Copies / mL of Milk")+
    geom_jitter(width=0.25)+
  ggtitle("Bovine DNA Copy Numbers")+
  theme_bw()+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk",  "MockCommunity", "NoTemplateControl"))
## Warning: Removed 80 rows containing missing values (geom_point).

# Inoculated Milk Data
Bovine.InnOnly <- Bovine %>% filter(VariableSampleType=="InoculatedMilk", LogCopiespermLofMilk>0.001)
dim(Bovine.InnOnly)
## [1] 120  42
Bovine.InnOnly %>%
  group_by(VariableKit) %>%
  summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
            st_dev=sd(LogCopiespermLofMilk,na.rm=T),
            n_missing=sum(is.na(LogCopiespermLofMilk)),
            n_total=n())%>%
  data.frame()
## `summarise()` ungrouping output (override with `.groups` argument)
##   VariableKit mean_LogCopiespermLofMilk     st_dev n_missing n_total
## 1     COREDNA                  5.447426 0.08176974         0      18
## 2      EZFood                  4.707684 0.59361196         0      12
## 3    Mastitis                  5.365196 0.05349710         0      18
## 4       Pfood                  4.863607 0.07949570         0      18
## 5      PSoilP                  4.050068 0.23971891         0      18
## 6   PviralDNA                  5.199113 0.10942986         0      18
## 7     ZymoDNA                  4.810397 0.06312697         0      18
####
Bovine.InnOnly.Summary = Bovine.InnOnly %>%
  group_by(VariableKit) %>%
  summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
            st_dev=sd(LogCopiespermLofMilk,na.rm=T),
            n_missing=sum(is.na(LogCopiespermLofMilk)),
            n_total=n())%>%
  data.frame()
## `summarise()` ungrouping output (override with `.groups` argument)
write.table (Bovine.InnOnly.Summary, "BovineInnOnly.summary.txt", sep="\t" )

write.table (Bovine.InnOnly, "BovineInnOnly.txt", sep="\t" )
 

Bovine.InnOnly %>%
  group_by(SpikeSet, VariableKit) %>%
  summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
            st_dev=sd(LogCopiespermLofMilk,na.rm=T),
            n_missing=sum(is.na(LogCopiespermLofMilk)),
            n_total=n())%>%
  data.frame()
## `summarise()` regrouping output by 'SpikeSet' (override with `.groups` argument)
##    SpikeSet VariableKit mean_LogCopiespermLofMilk     st_dev n_missing n_total
## 1     First     COREDNA                  5.510743 0.05437988         0       6
## 2     First      EZFood                  5.023606 0.07381508         0       6
## 3     First    Mastitis                  5.337045 0.05887863         0       6
## 4     First       Pfood                  4.875976 0.05016906         0       6
## 5     First      PSoilP                  3.832946 0.11231042         0       6
## 6     First   PviralDNA                  5.226245 0.05143839         0       6
## 7     First     ZymoDNA                  4.830652 0.05446231         0       6
## 8    Second     COREDNA                  5.396342 0.07460889         0       6
## 9    Second    Mastitis                  5.394293 0.02064839         0       6
## 10   Second       Pfood                  4.775180 0.03994283         0       6
## 11   Second      PSoilP                  4.084926 0.19020018         0       6
## 12   Second   PviralDNA                  5.085082 0.08519742         0       6
## 13   Second     ZymoDNA                  4.785882 0.03169048         0       6
## 14    Third     COREDNA                  5.435192 0.07802399         0       6
## 15    Third      EZFood                  4.391762 0.72817640         0       6
## 16    Third    Mastitis                  5.364249 0.06220550         0       6
## 17    Third       Pfood                  4.939664 0.02942455         0       6
## 18    Third      PSoilP                  4.232332 0.22067912         0       6
## 19    Third   PviralDNA                  5.286011 0.07253691         0       6
## 20    Third     ZymoDNA                  4.814657 0.09134190         0       6
ggplot(data=Bovine.InnOnly,
       mapping=aes(x=VariableKit,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
  ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  geom_jitter(width=0.25)+
  ggtitle("Bovine DNA Copy Numbers - Inoculated Milk Only")+
  theme_bw()+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

ggplot(Bovine.InnOnly, aes(VariableKit,LogCopiespermLofMilk,shape = factor(SpikeSet))) +
  scale_shape_discrete(solid=F) +
  geom_jitter(aes(colour = VariableKit), size = 2,stroke = 1, width = .5) +
  ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Bovine DNA Copy Numbers - Inoculated Milk Only")+
  theme_bw()+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

# Model Selection

#3 linear models were compared: including SpikeSet only, qPCRefficiency only, and both as covariates. Best model fit was used as the final model. 

# Model 1 - VariableKit + SpikeSet
m_Bovine.LogCopiespermLofMilk1 <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Bovine.InnOnly )
summary(m_Bovine.LogCopiespermLofMilk1)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Bovine.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.13707 -0.06085  0.00067  0.06704  0.54510 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           5.47765    0.05874  93.255  < 2e-16 ***
## VariableKitEZFood    -0.75758    0.08288  -9.141 3.39e-15 ***
## VariableKitMastitis  -0.08223    0.07292  -1.128 0.261910    
## VariableKitPfood     -0.58382    0.07292  -8.006 1.27e-12 ***
## VariableKitPSoilP    -1.39736    0.07292 -19.162  < 2e-16 ***
## VariableKitPviralDNA -0.24831    0.07292  -3.405 0.000921 ***
## VariableKitZymoDNA   -0.63703    0.07292  -8.736 2.86e-14 ***
## SpikeSetSecond       -0.06591    0.05064  -1.302 0.195747    
## SpikeSetThird        -0.02476    0.04774  -0.519 0.604983    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2188 on 111 degrees of freedom
## Multiple R-squared:  0.8217, Adjusted R-squared:  0.8088 
## F-statistic: 63.94 on 8 and 111 DF,  p-value: < 2.2e-16
# Model 2 - VariableKit + qPCRefficiency
m_Bovine.LogCopiespermLofMilk2 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, data=Bovine.InnOnly )
summary(m_Bovine.LogCopiespermLofMilk2)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, 
##     data = Bovine.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.14589 -0.06992  0.00801  0.06122  0.53629 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           5.32738    0.82569   6.452 2.91e-09 ***
## VariableKitEZFood    -0.73978    0.08178  -9.046 5.24e-15 ***
## VariableKitMastitis  -0.08275    0.07323  -1.130 0.260914    
## VariableKitPfood     -0.58434    0.07323  -7.979 1.39e-12 ***
## VariableKitPSoilP    -1.39788    0.07323 -19.088  < 2e-16 ***
## VariableKitPviralDNA -0.24831    0.07315  -3.395 0.000951 ***
## VariableKitZymoDNA   -0.63755    0.07323  -8.706 3.15e-14 ***
## qPCRefficiency        0.13332    0.91514   0.146 0.884437    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2194 on 112 degrees of freedom
## Multiple R-squared:  0.819,  Adjusted R-squared:  0.8077 
## F-statistic: 72.39 on 7 and 112 DF,  p-value: < 2.2e-16
# Model 3 - VariableKit + qPCRefficiency + SpikeSet
m_Bovine.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Bovine.InnOnly )
summary(m_Bovine.LogCopiespermLofMilk3)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + 
##     SpikeSet, data = Bovine.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.14165 -0.06039  0.00318  0.06822  0.54053 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           8.00863    7.32053   1.094 0.276347    
## VariableKitEZFood    -0.74512    0.09068  -8.217 4.48e-13 ***
## VariableKitMastitis  -0.07155    0.07946  -0.900 0.369853    
## VariableKitPfood     -0.57314    0.07946  -7.213 7.39e-11 ***
## VariableKitPSoilP    -1.38668    0.07946 -17.451  < 2e-16 ***
## VariableKitPviralDNA -0.24831    0.07321  -3.392 0.000966 ***
## VariableKitZymoDNA   -0.62635    0.07946  -7.882 2.51e-12 ***
## qPCRefficiency       -2.74709    7.94534  -0.346 0.730192    
## SpikeSetSecond       -0.09991    0.11070  -0.903 0.368756    
## SpikeSetThird        -0.16258    0.40148  -0.405 0.686296    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2196 on 110 degrees of freedom
## Multiple R-squared:  0.8219, Adjusted R-squared:  0.8073 
## F-statistic:  56.4 on 9 and 110 DF,  p-value: < 2.2e-16
anova(m_Bovine.LogCopiespermLofMilk1, m_Bovine.LogCopiespermLofMilk2)
## Analysis of Variance Table
## 
## Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    111 5.3125                           
## 2    112 5.3931 -1 -0.080615 1.6844  0.197
# Model with qPCRefficiency does not have better fit than model with SpikeSet

anova(m_Bovine.LogCopiespermLofMilk2, m_Bovine.LogCopiespermLofMilk3)
## Analysis of Variance Table
## 
## Model 1: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    112 5.3931                           
## 2    110 5.3067  2  0.086382 0.8953 0.4114
# Model with both qPCRefficiency and SpikeSet does not have better fit than model with qPCRefficiency only

anova(m_Bovine.LogCopiespermLofMilk1, m_Bovine.LogCopiespermLofMilk3)
## Analysis of Variance Table
## 
## Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    111 5.3125                           
## 2    110 5.3067  1  0.005767 0.1195 0.7302
# Model with both qPCRefficiency and SpikeSet does not have better fit than model with SpikeSet only

# Fit of model with both qPCRefficiency and SpikeSet is not different from fit of model with SpikeSet only
anova(m_Bovine.LogCopiespermLofMilk1, m_Bovine.LogCopiespermLofMilk2, m_Bovine.LogCopiespermLofMilk3)
## Analysis of Variance Table
## 
## Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency
## Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    111 5.3125                           
## 2    112 5.3931 -1 -0.080615 1.6710 0.1988
## 3    110 5.3067  2  0.086382 0.8953 0.4114
#m_Bovine.LogCopiespermLofMilk1 <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Bovine.InnOnly )
AIC (m_Bovine.LogCopiespermLofMilk1)
## [1] -13.54697
#m_Bovine.LogCopiespermLofMilk2 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, data=Bovine.InnOnly )
AIC (m_Bovine.LogCopiespermLofMilk2)
## [1] -13.73969
#m_Bovine.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Bovine.InnOnly )
AIC (m_Bovine.LogCopiespermLofMilk3)
## [1] -11.67731
# Final model chosen:
# Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet

Chosen Linear Model

# Fit Linear Model

m_Bovine.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Bovine.InnOnly )
summary(m_Bovine.LogCopiespermLofMilk)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Bovine.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.13707 -0.06085  0.00067  0.06704  0.54510 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           5.47765    0.05874  93.255  < 2e-16 ***
## VariableKitEZFood    -0.75758    0.08288  -9.141 3.39e-15 ***
## VariableKitMastitis  -0.08223    0.07292  -1.128 0.261910    
## VariableKitPfood     -0.58382    0.07292  -8.006 1.27e-12 ***
## VariableKitPSoilP    -1.39736    0.07292 -19.162  < 2e-16 ***
## VariableKitPviralDNA -0.24831    0.07292  -3.405 0.000921 ***
## VariableKitZymoDNA   -0.63703    0.07292  -8.736 2.86e-14 ***
## SpikeSetSecond       -0.06591    0.05064  -1.302 0.195747    
## SpikeSetThird        -0.02476    0.04774  -0.519 0.604983    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2188 on 111 degrees of freedom
## Multiple R-squared:  0.8217, Adjusted R-squared:  0.8088 
## F-statistic: 63.94 on 8 and 111 DF,  p-value: < 2.2e-16
# Model Fit Plots

plot(x=predict(m_Bovine.LogCopiespermLofMilk),y=resid(m_Bovine.LogCopiespermLofMilk))

  # using ggplot2
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")

ggplot(m_Bovine.LogCopiespermLofMilk, aes(x=predict(m_Bovine.LogCopiespermLofMilk), y=resid(m_Bovine.LogCopiespermLofMilk), color=VariableKit)) +
  geom_point()+
  theme_bw()+
  ggtitle("Bovine Innoculated Only - Model Fit - Residuals vs Predicted")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  geom_hline(yintercept = 1) +
  geom_hline(yintercept = -1)

  # qqplots
qqnorm(resid(m_Bovine.LogCopiespermLofMilk))
qqline(resid(m_Bovine.LogCopiespermLofMilk))

summary(m_Bovine.LogCopiespermLofMilk)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Bovine.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.13707 -0.06085  0.00067  0.06704  0.54510 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           5.47765    0.05874  93.255  < 2e-16 ***
## VariableKitEZFood    -0.75758    0.08288  -9.141 3.39e-15 ***
## VariableKitMastitis  -0.08223    0.07292  -1.128 0.261910    
## VariableKitPfood     -0.58382    0.07292  -8.006 1.27e-12 ***
## VariableKitPSoilP    -1.39736    0.07292 -19.162  < 2e-16 ***
## VariableKitPviralDNA -0.24831    0.07292  -3.405 0.000921 ***
## VariableKitZymoDNA   -0.63703    0.07292  -8.736 2.86e-14 ***
## SpikeSetSecond       -0.06591    0.05064  -1.302 0.195747    
## SpikeSetThird        -0.02476    0.04774  -0.519 0.604983    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2188 on 111 degrees of freedom
## Multiple R-squared:  0.8217, Adjusted R-squared:  0.8088 
## F-statistic: 63.94 on 8 and 111 DF,  p-value: < 2.2e-16
#Almost all residuals are < 1. 
#Exceptions are:

Bovine.InnOnly$resid <- resid(m_Bovine.LogCopiespermLofMilk)
Bovine.InnOnly %>% 
  filter(abs(resid)>1) %>%
  select(VariableKit,resid) %>%
  group_by(VariableKit) %>%
  summarize(n=n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 1 x 2
##   VariableKit     n
##   <chr>       <int>
## 1 EZFood          2
library(emmeans)
# Check Tukey-adjusted pairwise comparison of kit estimates
m_Bovine.LogCopiespermLofMilk_emmeans <- emmeans(m_Bovine.LogCopiespermLofMilk,pairwise~VariableKit)
# Use compact letter display for convenience
m_Bovine.LogCopiespermLofMilk_cld <- CLD(m_Bovine.LogCopiespermLofMilk_emmeans$emmeans,
                         Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
m_Bovine.LogCopiespermLofMilk_cld
##  VariableKit emmean     SE  df lower.CL upper.CL .group
##  PSoilP        4.05 0.0516 111     3.95     4.15  A    
##  EZFood        4.69 0.0649 111     4.56     4.82   B   
##  ZymoDNA       4.81 0.0516 111     4.71     4.91   B   
##  Pfood         4.86 0.0516 111     4.76     4.97   B   
##  PviralDNA     5.20 0.0516 111     5.10     5.30    C  
##  Mastitis      5.37 0.0516 111     5.26     5.47    CD 
##  COREDNA       5.45 0.0516 111     5.35     5.55     D 
## 
## Results are averaged over the levels of: SpikeSet 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 7 estimates 
## significance level used: alpha = 0.05
m_Bovine.LogCopiespermLofMilk_cld_detail <- CLD(m_Bovine.LogCopiespermLofMilk_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
m_Bovine.LogCopiespermLofMilk_cld_detail
## $emmeans
##  VariableKit emmean     SE  df lower.CL upper.CL .group
##  PSoilP        4.05 0.0516 111     3.95     4.15  A    
##  EZFood        4.69 0.0649 111     4.56     4.82   B   
##  ZymoDNA       4.81 0.0516 111     4.71     4.91   B   
##  Pfood         4.86 0.0516 111     4.76     4.97   B   
##  PviralDNA     5.20 0.0516 111     5.10     5.30    C  
##  Mastitis      5.37 0.0516 111     5.26     5.47    CD 
##  COREDNA       5.45 0.0516 111     5.35     5.55     D 
## 
## Results are averaged over the levels of: SpikeSet 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 7 estimates 
## significance level used: alpha = 0.05 
## 
## $comparisons
##  contrast             estimate     SE  df t.ratio p.value
##  EZFood - PSoilP        0.6398 0.0829 111  7.719  <.0001 
##  ZymoDNA - PSoilP       0.7603 0.0729 111 10.426  <.0001 
##  ZymoDNA - EZFood       0.1206 0.0829 111  1.455  0.7707 
##  Pfood - PSoilP         0.8135 0.0729 111 11.156  <.0001 
##  Pfood - EZFood         0.1738 0.0829 111  2.097  0.3620 
##  Pfood - ZymoDNA        0.0532 0.0729 111  0.730  0.9905 
##  PviralDNA - PSoilP     1.1490 0.0729 111 15.757  <.0001 
##  PviralDNA - EZFood     0.5093 0.0829 111  6.145  <.0001 
##  PviralDNA - ZymoDNA    0.3887 0.0729 111  5.330  <.0001 
##  PviralDNA - Pfood      0.3355 0.0729 111  4.601  0.0002 
##  Mastitis - PSoilP      1.3151 0.0729 111 18.034  <.0001 
##  Mastitis - EZFood      0.6754 0.0829 111  8.149  <.0001 
##  Mastitis - ZymoDNA     0.5548 0.0729 111  7.608  <.0001 
##  Mastitis - Pfood       0.5016 0.0729 111  6.878  <.0001 
##  Mastitis - PviralDNA   0.1661 0.0729 111  2.278  0.2643 
##  COREDNA - PSoilP       1.3974 0.0729 111 19.162  <.0001 
##  COREDNA - EZFood       0.7576 0.0829 111  9.141  <.0001 
##  COREDNA - ZymoDNA      0.6370 0.0729 111  8.736  <.0001 
##  COREDNA - Pfood        0.5838 0.0729 111  8.006  <.0001 
##  COREDNA - PviralDNA    0.2483 0.0729 111  3.405  0.0156 
##  COREDNA - Mastitis     0.0822 0.0729 111  1.128  0.9183 
## 
## Results are averaged over the levels of: SpikeSet 
## P value adjustment: tukey method for comparing a family of 7 estimates
# Get fitted values from model to plot with other software
emmeans(m_Bovine.LogCopiespermLofMilk,~ VariableKit) %>%
  summary() %>%
  data.frame() 
##   VariableKit   emmean         SE  df lower.CL upper.CL
## 1     COREDNA 5.447426 0.05156446 111 5.345247 5.549604
## 2      EZFood 4.689842 0.06488385 111 4.561271 4.818414
## 3    Mastitis 5.365196 0.05156446 111 5.263017 5.467374
## 4       Pfood 4.863607 0.05156446 111 4.761428 4.965785
## 5      PSoilP 4.050068 0.05156446 111 3.947890 4.152247
## 6   PviralDNA 5.199113 0.05156446 111 5.096934 5.301291
## 7     ZymoDNA 4.810397 0.05156446 111 4.708218 4.912575
# Get summary
summary(emmeans(m_Bovine.LogCopiespermLofMilk,~ VariableKit), infer=TRUE)
##  VariableKit emmean     SE  df lower.CL upper.CL t.ratio p.value
##  COREDNA       5.45 0.0516 111     5.35     5.55 105.643 <.0001 
##  EZFood        4.69 0.0649 111     4.56     4.82  72.281 <.0001 
##  Mastitis      5.37 0.0516 111     5.26     5.47 104.048 <.0001 
##  Pfood         4.86 0.0516 111     4.76     4.97  94.321 <.0001 
##  PSoilP        4.05 0.0516 111     3.95     4.15  78.544 <.0001 
##  PviralDNA     5.20 0.0516 111     5.10     5.30 100.827 <.0001 
##  ZymoDNA       4.81 0.0516 111     4.71     4.91  93.289 <.0001 
## 
## Results are averaged over the levels of: SpikeSet 
## Confidence level used: 0.95
# Plot fitted values from model
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
emmeans(m_Bovine.LogCopiespermLofMilk,~VariableKit) %>%
  summary() %>%
  data.frame() %>%

  ggplot(aes(x=VariableKit,y=emmean,color=VariableKit)) +
  geom_point() +
  labs(y="Estimated Marginal Means") +
  geom_errorbar(aes(ymin=lower.CL,ymax=upper.CL),width=0.5) +
  geom_text(data =data.frame(m_Bovine.LogCopiespermLofMilk_cld),aes(x=VariableKit,label=`.group`),hjust=-.1) +
 ylim(3.5, 8.5)+
  theme_bw()+
 ggtitle("Bovine Copy Numbers - Inoculated Milk Only")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

### Figure: Bovine InnOnly Raw Data + Final Model Output

df1_Bovine.rawdata<-Bovine.InnOnly[c(25,42,4)]

df2_Bovine.model<-emmeans(m_Bovine.LogCopiespermLofMilk,~VariableKit) %>%
  summary() %>%
  data.frame()

ggplot() +
  geom_jitter(data=df1_Bovine.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
  geom_point(data=df2_Bovine.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_errorbar(data=df2_Bovine.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)

# Making the plot pretty

Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
  
ggplot() +
  geom_jitter(data=df1_Bovine.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet)) +
  scale_color_manual(values=Colors)+
  geom_point(data=df2_Bovine.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_errorbar(data=df2_Bovine.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)+  
  geom_text(data =data.frame(m_Bovine.LogCopiespermLofMilk_cld),aes(x=VariableKit,label=`.group`, y=emmean),hjust=-.5) +
ylim(3.5, 6.5)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
  theme_bw()+
  ggtitle("Bovine Copy Numbers - Inoculated Milk Only")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Bovine - Models not assuming homoscedasticity

# from https://cran.r-project.org/web/packages/emmeans/vignettes/FAQs.html#contents

library(nlme)
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
# lm chosen: m_Bovine.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Bovine.InnOnly )

mod.Bovine = nlme::gls(LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Bovine.InnOnly,
                 weights = varIdent(form = ~1 | VariableKit))
summary(mod.Bovine)
## Generalized least squares fit by REML
##   Model: LogCopiespermLofMilk ~ VariableKit + SpikeSet 
##   Data: Bovine.InnOnly 
##         AIC       BIC   logLik
##   -128.8078 -85.45536 80.40392
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | VariableKit 
##  Parameter estimates:
##   COREDNA    EZFood  Mastitis     Pfood    PSoilP PviralDNA   ZymoDNA 
## 1.0000000 7.6154180 0.9937919 0.6200963 3.0409418 1.0280090 0.8533467 
## 
## Coefficients:
##                          Value  Std.Error   t-value p-value
## (Intercept)           5.462007 0.02108712 259.02104  0.0000
## VariableKitEZFood    -0.767394 0.17466643  -4.39348  0.0000
## VariableKitMastitis  -0.082230 0.02624097  -3.13365  0.0022
## VariableKitPfood     -0.583819 0.02190093 -26.65728  0.0000
## VariableKitPSoilP    -1.397357 0.05958244 -23.45251  0.0000
## VariableKitPviralDNA -0.248313 0.02669373  -9.30230  0.0000
## VariableKitZymoDNA   -0.637029 0.02446865 -26.03449  0.0000
## SpikeSetSecond       -0.069885 0.01717688  -4.06858  0.0001
## SpikeSetThird         0.026141 0.01716113   1.52328  0.1305
## 
##  Correlation: 
##                      (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA SpkStS
## VariableKitEZFood    -0.101                                                 
## VariableKitMastitis  -0.626  0.076                                          
## VariableKitPfood     -0.750  0.091  0.603                                   
## VariableKitPSoilP    -0.276  0.033  0.222  0.265                            
## VariableKitPviralDNA -0.615  0.074  0.495  0.593  0.218                     
## VariableKitZymoDNA   -0.671  0.081  0.540  0.646  0.238  0.530              
## SpikeSetSecond       -0.407  0.025  0.000  0.000  0.000  0.000  0.000       
## SpikeSetThird        -0.407  0.000  0.000  0.000  0.000  0.000  0.000  0.500
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.47091970 -0.57358512  0.01701375  0.77528943  1.49714889 
## 
## Residual standard error: 0.07896765 
## Degrees of freedom: 120 total; 111 residual
AIC(mod.Bovine)
## [1] -128.8078
# Model forcing qPCRefficiency


mod.Bovine.all = nlme::gls(LogCopiespermLofMilk ~ VariableKit + SpikeSet+qPCRefficiency, data = Bovine.InnOnly,
                 weights = varIdent(form = ~1 | VariableKit))
summary(mod.Bovine.all)
## Generalized least squares fit by REML
##   Model: LogCopiespermLofMilk ~ VariableKit + SpikeSet + qPCRefficiency 
##   Data: Bovine.InnOnly 
##        AIC       BIC logLik
##   -134.968 -89.05984 84.484
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | VariableKit 
##  Parameter estimates:
##   COREDNA    EZFood  Mastitis     Pfood    PSoilP PviralDNA   ZymoDNA 
## 1.0000000 8.5338537 1.0207949 0.7350816 3.2883480 1.1966519 0.9487773 
## 
## Coefficients:
##                          Value Std.Error    t-value p-value
## (Intercept)          11.116127 2.5679470   4.328799  0.0000
## VariableKitEZFood    -0.737765 0.1763197  -4.184246  0.0001
## VariableKitMastitis  -0.058375 0.0262665  -2.222422  0.0283
## VariableKitPfood     -0.559964 0.0234365 -23.892862  0.0000
## VariableKitPSoilP    -1.373503 0.0585627 -23.453530  0.0000
## VariableKitPviralDNA -0.248313 0.0261128  -9.509231  0.0000
## VariableKitZymoDNA   -0.613174 0.0254982 -24.047734  0.0000
## SpikeSetSecond       -0.142583 0.0388906  -3.666248  0.0004
## SpikeSetThird        -0.286015 0.1405229  -2.035361  0.0442
## qPCRefficiency       -6.136524 2.7871131  -2.201749  0.0298
## 
##  Correlation: 
##                      (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA SpkStS
## VariableKitEZFood     0.070                                                 
## VariableKitMastitis   0.408  0.090                                          
## VariableKitPfood      0.458  0.100  0.646                                   
## VariableKitPSoilP     0.183  0.040  0.259  0.290                            
## VariableKitPviralDNA -0.004  0.061  0.409  0.458  0.183                     
## VariableKitZymoDNA    0.421  0.092  0.594  0.666  0.266  0.421              
## SpikeSetSecond       -0.899 -0.053 -0.370 -0.415 -0.166  0.000 -0.381       
## SpikeSetThird        -0.993 -0.070 -0.409 -0.459 -0.184  0.000 -0.422  0.918
## qPCRefficiency       -1.000 -0.071 -0.412 -0.462 -0.185  0.000 -0.425  0.897
##                      SpkStT
## VariableKitEZFood          
## VariableKitMastitis        
## VariableKitPfood           
## VariableKitPSoilP          
## VariableKitPviralDNA       
## VariableKitZymoDNA         
## SpikeSetSecond             
## SpikeSetThird              
## qPCRefficiency        0.993
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.50453616 -0.57224613  0.02251345  0.79650637  1.52273963 
## 
## Residual standard error: 0.07104128 
## Degrees of freedom: 120 total; 110 residual
AIC(mod.Bovine.all)
## [1] -134.968
# It is not possible to fit model with interactions because of dropout (failure to amplify in some samples). Gls does not tolerate missing cells in interaction terms.

# Testing simpler model

mod3 = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data = Bovine.InnOnly,
                 weights = varIdent(form = ~1 | VariableKit))
summary(mod3)
## Generalized least squares fit by REML
##   Model: LogCopiespermLofMilk ~ VariableKit 
##   Data: Bovine.InnOnly 
##         AIC       BIC   logLik
##   -131.4578 -93.27441 79.72892
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | VariableKit 
##  Parameter estimates:
##   COREDNA    EZFood  Mastitis     Pfood    PSoilP PviralDNA   ZymoDNA 
## 1.0000000 7.2595670 0.6542400 0.9721864 2.9316260 1.3382655 0.7720084 
## 
## Coefficients:
##                          Value  Std.Error   t-value p-value
## (Intercept)           5.447426 0.01927334 282.64048   0e+00
## VariableKitEZFood    -0.739742 0.17244196  -4.28980   0e+00
## VariableKitMastitis  -0.082230 0.02303168  -3.57030   5e-04
## VariableKitPfood     -0.583819 0.02688024 -21.71927   0e+00
## VariableKitPSoilP    -1.397357 0.05969893 -23.40674   0e+00
## VariableKitPviralDNA -0.248313 0.03219833  -7.71199   0e+00
## VariableKitZymoDNA   -0.637029 0.02434854 -26.16292   0e+00
## 
##  Correlation: 
##                      (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VariableKitEZFood    -0.112                                   
## VariableKitMastitis  -0.837  0.094                            
## VariableKitPfood     -0.717  0.080  0.600                     
## VariableKitPSoilP    -0.323  0.036  0.270  0.231              
## VariableKitPviralDNA -0.599  0.067  0.501  0.429  0.193       
## VariableKitZymoDNA   -0.792  0.088  0.662  0.568  0.256  0.474
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.5993333 -0.6143524  0.1093653  0.7539501  2.2432653 
## 
## Residual standard error: 0.08176985 
## Degrees of freedom: 120 total; 113 residual
AIC(mod.Bovine)
## [1] -128.8078
AIC(mod.Bovine.all) # best model
## [1] -134.968
AIC(mod3) 
## [1] -131.4578
mod.Bovine.best <- mod.Bovine.all
AIC (mod.Bovine.best)
## [1] -134.968
AIC (m_Bovine.LogCopiespermLofMilk)
## [1] -13.54697
AIC (mod3)
## [1] -131.4578
summary(mod3)
## Generalized least squares fit by REML
##   Model: LogCopiespermLofMilk ~ VariableKit 
##   Data: Bovine.InnOnly 
##         AIC       BIC   logLik
##   -131.4578 -93.27441 79.72892
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | VariableKit 
##  Parameter estimates:
##   COREDNA    EZFood  Mastitis     Pfood    PSoilP PviralDNA   ZymoDNA 
## 1.0000000 7.2595670 0.6542400 0.9721864 2.9316260 1.3382655 0.7720084 
## 
## Coefficients:
##                          Value  Std.Error   t-value p-value
## (Intercept)           5.447426 0.01927334 282.64048   0e+00
## VariableKitEZFood    -0.739742 0.17244196  -4.28980   0e+00
## VariableKitMastitis  -0.082230 0.02303168  -3.57030   5e-04
## VariableKitPfood     -0.583819 0.02688024 -21.71927   0e+00
## VariableKitPSoilP    -1.397357 0.05969893 -23.40674   0e+00
## VariableKitPviralDNA -0.248313 0.03219833  -7.71199   0e+00
## VariableKitZymoDNA   -0.637029 0.02434854 -26.16292   0e+00
## 
##  Correlation: 
##                      (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VariableKitEZFood    -0.112                                   
## VariableKitMastitis  -0.837  0.094                            
## VariableKitPfood     -0.717  0.080  0.600                     
## VariableKitPSoilP    -0.323  0.036  0.270  0.231              
## VariableKitPviralDNA -0.599  0.067  0.501  0.429  0.193       
## VariableKitZymoDNA   -0.792  0.088  0.662  0.568  0.256  0.474
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.5993333 -0.6143524  0.1093653  0.7539501  2.2432653 
## 
## Residual standard error: 0.08176985 
## Degrees of freedom: 120 total; 113 residual
summary(m_Bovine.LogCopiespermLofMilk)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Bovine.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.13707 -0.06085  0.00067  0.06704  0.54510 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           5.47765    0.05874  93.255  < 2e-16 ***
## VariableKitEZFood    -0.75758    0.08288  -9.141 3.39e-15 ***
## VariableKitMastitis  -0.08223    0.07292  -1.128 0.261910    
## VariableKitPfood     -0.58382    0.07292  -8.006 1.27e-12 ***
## VariableKitPSoilP    -1.39736    0.07292 -19.162  < 2e-16 ***
## VariableKitPviralDNA -0.24831    0.07292  -3.405 0.000921 ***
## VariableKitZymoDNA   -0.63703    0.07292  -8.736 2.86e-14 ***
## SpikeSetSecond       -0.06591    0.05064  -1.302 0.195747    
## SpikeSetThird        -0.02476    0.04774  -0.519 0.604983    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2188 on 111 degrees of freedom
## Multiple R-squared:  0.8217, Adjusted R-squared:  0.8088 
## F-statistic: 63.94 on 8 and 111 DF,  p-value: < 2.2e-16
#model not assuming homoscedasticity and including VariableKit + SpikeSet + qPCRefficiency is a much better fit than any of the alternatives

  # qqplots
qqnorm(resid(m_Bovine.LogCopiespermLofMilk))
qqline(resid(m_Bovine.LogCopiespermLofMilk))

qqnorm(resid(mod.Bovine.best))
qqline(resid(mod.Bovine.best))

Bovine Figure

library(emmeans)
# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Bovine.best_emmeans <- emmeans(mod.Bovine.best,pairwise~VariableKit, mode = "df.error")

# Use compact letter display for convenience
mod.Bovine.best_cld <- CLD(mod.Bovine.best_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Bovine.best_cld_letters <- CLD(mod.Bovine.best_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Bovine.best_cld_letters
##  VariableKit emmean     SE  df lower.CL upper.CL .group
##  PSoilP        4.06 0.0552 103     3.95     4.17  A    
##  EZFood        4.70 0.1752 103     4.35     5.04   BC  
##  ZymoDNA       4.82 0.0164 103     4.79     4.85   B   
##  Pfood         4.87 0.0130 103     4.85     4.90   B   
##  PviralDNA     5.18 0.0211 103     5.14     5.23    C  
##  Mastitis      5.37 0.0176 103     5.34     5.41     D 
##  COREDNA       5.43 0.0180 103     5.40     5.47     D 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: df.error 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 7 estimates 
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(mod.Bovine.best,~ VariableKit,mode = "df.error") %>%
  summary() %>%
  data.frame() 
##   VariableKit   emmean         SE  df lower.CL upper.CL
## 1     COREDNA 5.432929 0.01799250 103 5.397245 5.468613
## 2      EZFood 4.695164 0.17517917 103 4.347738 5.042591
## 3    Mastitis 5.374554 0.01761333 103 5.339622 5.409486
## 4       Pfood 4.872965 0.01302185 103 4.847139 4.898791
## 5      PSoilP 4.059427 0.05522585 103 3.949899 4.168954
## 6   PviralDNA 5.184616 0.02109142 103 5.142786 5.226446
## 7     ZymoDNA 4.819755 0.01644564 103 4.787139 4.852371
# Get summary
summary(emmeans(mod.Bovine.best,~ VariableKit,mode = "df.error"), infer=TRUE)
##  VariableKit emmean     SE  df lower.CL upper.CL t.ratio p.value
##  COREDNA       5.43 0.0180 103     5.40     5.47 301.955 <.0001 
##  EZFood        4.70 0.1752 103     4.35     5.04  26.802 <.0001 
##  Mastitis      5.37 0.0176 103     5.34     5.41 305.141 <.0001 
##  Pfood         4.87 0.0130 103     4.85     4.90 374.214 <.0001 
##  PSoilP        4.06 0.0552 103     3.95     4.17  73.506 <.0001 
##  PviralDNA     5.18 0.0211 103     5.14     5.23 245.816 <.0001 
##  ZymoDNA       4.82 0.0164 103     4.79     4.85 293.072 <.0001 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: df.error 
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod_df1_Bovine.rawdata<-Bovine.InnOnly[c(25,42,4)]

mod_df2_Bovine.best.model<-emmeans(mod.Bovine.best,~VariableKit, mode = "df.error") %>%
  summary() %>%
  data.frame()

ggplot() +
  geom_jitter(data=mod_df1_Bovine.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
  geom_point(data=mod_df2_Bovine.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_errorbar(data=mod_df2_Bovine.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)

# Making the plot pretty

Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
  
ggplot() +
  geom_jitter(data=mod_df1_Bovine.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet)) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Bovine.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Bovine.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Bovine.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
  theme_bw()+
  ggtitle("Bovine Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.


Model not assuming homoscedasticity and including VariableKit + SpikeSet + qPCRefficiency was chosen.
qPCRefficiency is forced into all final models

Formula: mod.Bovine.all = nlme::gls(LogCopiespermLofMilk ~ VariableKit + SpikeSet + qPCRefficiency, data = Bovine.InnOnly,weights = varIdent(form = ~1 | VariableKit))

AIC(mod.Bovine.all) # best model
-134.968


Other Models for Reference:

mod.Bovine = nlme::gls(LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Bovine.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod.Bovine)
-128.8078

mod3 = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data = Bovine.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod3)
-131.4578

Previously chosen Linear Model that assumed homoscedasticity for reference:
m_Bovine.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Bovine.InnOnly )
AIC(m_Bovine.LogCopiespermLofMilk)
-13.54697



Manuscript Figures: Bovine


# Bovine: Milk Data and Controls
Bovine.Inn.Ctrl <- Bovine %>% filter(VariableSampleType!="NP40InoculatedMilk")
dim(Bovine.Inn.Ctrl)
## [1] 240  42
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2338B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")


Bovine.Inn.Ctrl$VariableSampleType <- factor(Bovine.Inn.Ctrl$VariableSampleType, levels=c('InoculatedMilk', 'UninoculatedMilk', 'NoTemplateControl', 'MockCommunity'))

ggplot(data=Bovine.Inn.Ctrl, aes(VariableKit,LogCopiespermLofMilk, color= VariableKit, shape=SpikeSet), show.legend = F)+
  scale_shape_discrete(solid=F) +
  ylab ("Bovine Log10 Copies / mL of Milk")+  
  xlab ("Kit")+
  geom_point(aes(colour = VariableKit), size = 2, stroke = .5, position=position_jitterdodge(jitter.width=0, dodge.width = 1), show.legend = F) +
  facet_wrap(vars(VariableSampleType),nrow = 1)+
  ggtitle("Bovine DNA Copy Numbers - All Samples and Controls")+
  theme_bw()+
  ylim(0, 9)+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=90,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Warning: Removed 80 rows containing missing values (geom_point).

ggsave("Bovine-AllSamples.TIFF", width = 9, height = 3,units = "in", dpi = 600)
## Warning: Removed 80 rows containing missing values (geom_point).
ggplot(Bovine.InnOnly, aes(VariableKit,LogCopiespermLofMilk,shape = factor(SpikeSet))) +
  scale_shape_discrete(solid=F) +
  geom_point(aes(colour = VariableKit), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = 1)) +
  ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Bovine DNA Copy Numbers - Inoculated Milk Only")+
  theme_bw()+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

# Plot overlaying model estimates to raw data
mod_df1_Bovine.rawdata<-Bovine.InnOnly[c(25,42,4)]

mod_df2_Bovine.best.model<-emmeans(mod3,~VariableKit, mode = "df.error") %>%
  summary() %>%
  data.frame()

ggplot() +
  geom_jitter(data=mod_df1_Bovine.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2,stroke = 1, width = .2 ) +
   scale_shape_discrete(solid=F) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Bovine.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Bovine.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Bovine.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.7, nudge_x = -0.05, fontface = "bold") +
ylim(3, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Bovine DNA Copy Numbers - Inoculated Milk Only")+
   theme_bw()+
   theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

ggsave("Bovine-Model-Jitter.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)


ggplot() +
  geom_point(data=mod_df1_Bovine.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = .5)) +
   scale_shape_discrete(solid=F) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Bovine.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Bovine.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Bovine.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = .7, nudge_x = -0.05, fontface = "bold") +
ylim(3, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Bovine DNA Copy Numbers - Inoculated Milk Only")+
   theme_bw()+
   theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

ggsave("Bovine-Model.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)

Total Bacterial DNA


#Total Bacterial DNA

#Data File: CleanDNAprepData1.18.19
library(ggplot2)
library(dplyr)
library(emmeans)
library(multcompView)

#Filter Subset from Sample Data
TotalBacterialDNA <- SampleData %>% filter(Assay=="Total Bacterial DNA")
dim(TotalBacterialDNA)
## [1] 240  42
#Summary Statistics

TotalBacterialDNA.summary <- TotalBacterialDNA %>%
  group_by(VariableKit,VariableSampleType) %>%
  summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
            st_dev=sd(LogCopiespermLofMilk,na.rm=T),
            n_missing=sum(is.na(LogCopiespermLofMilk)),
            n_total=n())%>%
  data.frame()
## `summarise()` regrouping output by 'VariableKit' (override with `.groups` argument)
write.table (TotalBacterialDNA.summary, "TotalBacterialDNA.summary.txt", sep="\t" )
#Plot Raw Means and Standard Deviations
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(data=TotalBacterialDNA,
       mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk,z=VariableKit, color=VariableKit, ylab="Copy Numbers")) +
  ylab ("Log10 Copies / mL of Milk")+
  geom_boxplot(lwd=1)+
  theme_bw()+
  ggtitle("TotalBacterialDNA Copy Numbers")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk",  "MockCommunity", "NoTemplateControl"))
## Warning: Removed 22 rows containing non-finite values (stat_boxplot).

ggplot(data=TotalBacterialDNA,
       mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
  ylab ("Log10 Copies / mL of Milk")+
      geom_jitter(width=0.25)+
  ggtitle("TotalBacterialDNA Copy Numbers")+
  theme_bw()+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk",  "MockCommunity", "NoTemplateControl"))
## Warning: Removed 22 rows containing missing values (geom_point).

ggplot(data=TotalBacterialDNA,
       mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=SpikeSet, shape=VariableKit)) +
  ylab ("Log10 Copies / mL of Milk")+
    geom_jitter(width=0.35)+
  ggtitle("TotalBacterialDNA Copy Numbers")+
  theme_bw()+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk",  "MockCommunity", "NoTemplateControl"))
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 58 rows containing missing values (geom_point).

# Milk Data and Controls
TotalBacterialDNA.Inn.Ctrl <- TotalBacterialDNA %>% filter(VariableSampleType!="NP40InoculatedMilk")
dim(TotalBacterialDNA.Inn.Ctrl)
## [1] 240  42
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2338B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")


TotalBacterialDNA.Inn.Ctrl$VariableSampleType <- factor(TotalBacterialDNA.Inn.Ctrl$VariableSampleType, levels=c('InoculatedMilk', 'UninoculatedMilk', 'NoTemplateControl', 'MockCommunity'))

 ggplot(data=TotalBacterialDNA.Inn.Ctrl, aes(VariableKit,LogCopiespermLofMilk, color= VariableKit, shape=SpikeSet),show.legend = F)+
   scale_shape_discrete(solid=F) +
     ylab ("Bacteria Log10 Copies / mL of Milk")+
    geom_jitter(width=0.25, size=2.5, stroke=0.7)+
  facet_wrap(vars(VariableSampleType),nrow = 1)+
    ggtitle("Bacteria DNA Copy Numbers - All Samples and Controls")+
  theme_bw()+
  ylim(0, 9)+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Warning: Removed 22 rows containing missing values (geom_point).

# Inoculated Milk Data
TotalBacterialDNA.InnOnly <- TotalBacterialDNA %>% filter(VariableSampleType=="InoculatedMilk", LogCopiespermLofMilk>0.001)
TotalBacterialDNA.InnOnly %>%
  group_by(VariableSampleType, VariableSpikeSet, VariableKit) %>%
  summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
            st_dev=sd(LogCopiespermLofMilk,na.rm=T),
            n_missing=sum(is.na(LogCopiespermLofMilk)),
            n_total=n())%>%
  data.frame()
## `summarise()` regrouping output by 'VariableSampleType', 'VariableSpikeSet' (override with `.groups` argument)
##    VariableSampleType VariableSpikeSet VariableKit mean_LogCopiespermLofMilk
## 1      InoculatedMilk            First     COREDNA                  6.565413
## 2      InoculatedMilk            First      EZFood                  4.277616
## 3      InoculatedMilk            First    Mastitis                  6.805970
## 4      InoculatedMilk            First       Pfood                  6.076956
## 5      InoculatedMilk            First      PSoilP                  5.698609
## 6      InoculatedMilk            First   PviralDNA                  6.192312
## 7      InoculatedMilk            First     ZymoDNA                  6.346418
## 8      InoculatedMilk           Second     COREDNA                  7.490835
## 9      InoculatedMilk           Second    Mastitis                  7.668798
## 10     InoculatedMilk           Second       Pfood                  6.631503
## 11     InoculatedMilk           Second      PSoilP                  6.439095
## 12     InoculatedMilk           Second   PviralDNA                  7.121062
## 13     InoculatedMilk           Second     ZymoDNA                  6.697743
## 14     InoculatedMilk            Third     COREDNA                  6.881582
## 15     InoculatedMilk            Third      EZFood                  4.342899
## 16     InoculatedMilk            Third    Mastitis                  8.053032
## 17     InoculatedMilk            Third       Pfood                  7.418800
## 18     InoculatedMilk            Third      PSoilP                  6.795545
## 19     InoculatedMilk            Third   PviralDNA                  7.556803
## 20     InoculatedMilk            Third     ZymoDNA                  7.282966
##        st_dev n_missing n_total
## 1  0.05259444         0       6
## 2  1.45768760         0       6
## 3  0.01589447         0       6
## 4  0.02287900         0       6
## 5  0.11744216         0       6
## 6  0.03599713         0       6
## 7  0.03366165         0       6
## 8  0.02140125         0       6
## 9  0.02543933         0       6
## 10 0.04452933         0       6
## 11 0.15846125         0       6
## 12 0.07745249         0       6
## 13 0.04461625         0       6
## 14 1.74763906         0       5
## 15 2.30314702         0       4
## 16 0.06686248         0       6
## 17 0.03041420         0       6
## 18 0.08899794         0       6
## 19 0.10432742         0       6
## 20 0.12384595         0       6

Model Selection

Linear Models

#3 linear models were compared: including SpikeSet only, qPCRefficiency only, and both as covariates. Best model fit was used as the final model. 
m_TotalBacterialDNA.LogCopiespermLofMilk1 <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=TotalBacterialDNA.InnOnly )
summary(m_TotalBacterialDNA.LogCopiespermLofMilk1)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = TotalBacterialDNA.InnOnly)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2707 -0.1573  0.0449  0.1796  1.8855 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           6.47037    0.17757  36.439  < 2e-16 ***
## VariableKitEZFood    -2.54574    0.26294  -9.682 2.47e-16 ***
## VariableKitMastitis   0.50018    0.21946   2.279  0.02463 *  
## VariableKitPfood     -0.30000    0.21946  -1.367  0.17448    
## VariableKitPSoilP    -0.69800    0.21946  -3.180  0.00192 ** 
## VariableKitPviralDNA -0.05236    0.21946  -0.239  0.81189    
## VariableKitZymoDNA   -0.23338    0.21946  -1.063  0.28998    
## SpikeSetSecond        0.66840    0.15065   4.437 2.21e-05 ***
## SpikeSetThird         0.94775    0.14458   6.555 1.96e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6488 on 108 degrees of freedom
## Multiple R-squared:  0.6727, Adjusted R-squared:  0.6484 
## F-statistic: 27.74 on 8 and 108 DF,  p-value: < 2.2e-16
m_TotalBacterialDNA.LogCopiespermLofMilk2 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, data=TotalBacterialDNA.InnOnly )
summary(m_TotalBacterialDNA.LogCopiespermLofMilk2)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, 
##     data = TotalBacterialDNA.InnOnly)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7187 -0.3802 -0.1493  0.5690  2.3270 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           5.45105    0.59965   9.090 5.07e-15 ***
## VariableKitEZFood    -2.45939    0.30825  -7.979 1.60e-12 ***
## VariableKitMastitis   0.59092    0.25322   2.334  0.02145 *  
## VariableKitPfood     -0.20926    0.25322  -0.826  0.41038    
## VariableKitPSoilP    -0.60726    0.25322  -2.398  0.01817 *  
## VariableKitPviralDNA -0.02169    0.25200  -0.086  0.93156    
## VariableKitZymoDNA   -0.14263    0.25322  -0.563  0.57439    
## qPCRefficiency        1.77000    0.65975   2.683  0.00844 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7451 on 109 degrees of freedom
## Multiple R-squared:  0.5642, Adjusted R-squared:  0.5362 
## F-statistic: 20.16 on 7 and 109 DF,  p-value: < 2.2e-16
m_TotalBacterialDNA.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=TotalBacterialDNA.InnOnly )
summary(m_TotalBacterialDNA.LogCopiespermLofMilk3)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + 
##     SpikeSet, data = TotalBacterialDNA.InnOnly)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3078 -0.1452  0.0405  0.2129  1.8860 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           7.53077    2.07380   3.631 0.000434 ***
## VariableKitEZFood    -2.60744    0.28994  -8.993 9.65e-15 ***
## VariableKitMastitis   0.45066    0.24043   1.874 0.063607 .  
## VariableKitPfood     -0.34952    0.24043  -1.454 0.148947    
## VariableKitPSoilP    -0.74753    0.24043  -3.109 0.002405 ** 
## VariableKitPviralDNA -0.05442    0.22025  -0.247 0.805317    
## VariableKitZymoDNA   -0.28290    0.24043  -1.177 0.241951    
## qPCRefficiency       -1.39836    2.72463  -0.513 0.608849    
## SpikeSetSecond        1.01868    0.69906   1.457 0.147983    
## SpikeSetThird         1.04245    0.23473   4.441 2.19e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.651 on 107 degrees of freedom
## Multiple R-squared:  0.6735, Adjusted R-squared:  0.646 
## F-statistic: 24.52 on 9 and 107 DF,  p-value: < 2.2e-16
anova(m_TotalBacterialDNA.LogCopiespermLofMilk1, m_TotalBacterialDNA.LogCopiespermLofMilk2, m_TotalBacterialDNA.LogCopiespermLofMilk3)
## Analysis of Variance Table
## 
## Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency
## Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1    108 45.455                                  
## 2    109 60.514 -1   -15.059 35.536 3.263e-08 ***
## 3    107 45.343  2    15.171 17.900 1.969e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Model with qPCRefficiency does not have better fit than model with SpikeSet

AIC(m_TotalBacterialDNA.LogCopiespermLofMilk1)
## [1] 241.4133
AIC(m_TotalBacterialDNA.LogCopiespermLofMilk2)
## [1] 272.8932
AIC(m_TotalBacterialDNA.LogCopiespermLofMilk3)
## [1] 243.1257
# Final model chosen:
# Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
# No ddifference in fit, simplest model chosen

Mixed effects models - for reference - AIC of linear models is smaller

library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'lme4'
## The following object is masked from 'package:nlme':
## 
##     lmList
library(lmerTest)
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
model1 = lmer(LogCopiespermLofMilk ~ VariableKit + (1|SpikeSet),
            data=TotalBacterialDNA.InnOnly,
            REML=TRUE)
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
##    Data: TotalBacterialDNA.InnOnly
## 
## REML criterion at convergence: 242.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0127 -0.2393  0.0585  0.3041  2.9078 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  SpikeSet (Intercept) 0.2273   0.4768  
##  Residual             0.4209   0.6487  
## Number of obs: 117, groups:  SpikeSet, 3
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            7.00804    0.31709   3.21399  22.101 0.000127 ***
## VariableKitEZFood     -2.55195    0.26272 108.34313  -9.714  < 2e-16 ***
## VariableKitMastitis    0.50122    0.21946 108.01544   2.284 0.024331 *  
## VariableKitPfood      -0.29896    0.21946 108.01544  -1.362 0.175950    
## VariableKitPSoilP     -0.69696    0.21946 108.01544  -3.176 0.001948 ** 
## VariableKitPviralDNA  -0.05132    0.21946 108.01544  -0.234 0.815553    
## VariableKitZymoDNA    -0.23233    0.21946 108.01544  -1.059 0.292108    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VarblKtEZFd -0.298                                   
## VrblKtMstts -0.356  0.431                            
## VariblKtPfd -0.356  0.431  0.515                     
## VarblKtPSlP -0.356  0.431  0.515  0.515              
## VrblKtPvDNA -0.356  0.431  0.515  0.515  0.515       
## VrblKtZyDNA -0.356  0.431  0.515  0.515  0.515  0.515
model2 = lmer(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1|SpikeSet),
            data=TotalBacterialDNA.InnOnly,
            REML=TRUE)
summary(model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
##    Data: TotalBacterialDNA.InnOnly
## 
## REML criterion at convergence: 239.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0040 -0.2417  0.0572  0.2971  2.8908 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  SpikeSet (Intercept) 0.2930   0.5413  
##  Residual             0.4231   0.6504  
## Number of obs: 117, groups:  SpikeSet, 3
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            6.97517    1.78073   6.85156   3.917  0.00602 ** 
## VariableKitEZFood     -2.54882    0.28021 104.66468  -9.096 6.67e-15 ***
## VariableKitMastitis    0.50236    0.23143 102.87457   2.171  0.03226 *  
## VariableKitPfood      -0.29782    0.23143 102.87457  -1.287  0.20103    
## VariableKitPSoilP     -0.69582    0.23143 102.87457  -3.007  0.00332 ** 
## VariableKitPviralDNA  -0.05148    0.22006 107.41131  -0.234  0.81548    
## VariableKitZymoDNA    -0.23120    0.23143 102.87457  -0.999  0.32015    
## qPCRefficiency         0.03828    2.01965   7.28234   0.019  0.98539    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA
## VarblKtEZFd -0.384                                          
## VrblKtMstts -0.364  0.490                                   
## VariblKtPfd -0.364  0.490  0.561                            
## VarblKtPSlP -0.364  0.490  0.561  0.561                     
## VrblKtPvDNA -0.078  0.410  0.494  0.494  0.494              
## VrblKtZyDNA -0.364  0.490  0.561  0.561  0.561  0.494       
## qPCReffcncy -0.980  0.341  0.310  0.310  0.310  0.014  0.310
anova(model1, model2)
## refitting model(s) with ML (instead of REML)
## Data: TotalBacterialDNA.InnOnly
## Models:
## model1: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
## model2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
##        npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## model1    9 250.67 275.53 -116.33   232.67                     
## model2   10 252.56 280.18 -116.28   232.56 0.1137  1      0.736
AIC (model1)
## [1] 260.691
AIC (model2)
## [1] 259.5033

Final Model

m_TotalBacterialDNA.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=TotalBacterialDNA.InnOnly )
summary(m_TotalBacterialDNA.LogCopiespermLofMilk)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = TotalBacterialDNA.InnOnly)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2707 -0.1573  0.0449  0.1796  1.8855 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           6.47037    0.17757  36.439  < 2e-16 ***
## VariableKitEZFood    -2.54574    0.26294  -9.682 2.47e-16 ***
## VariableKitMastitis   0.50018    0.21946   2.279  0.02463 *  
## VariableKitPfood     -0.30000    0.21946  -1.367  0.17448    
## VariableKitPSoilP    -0.69800    0.21946  -3.180  0.00192 ** 
## VariableKitPviralDNA -0.05236    0.21946  -0.239  0.81189    
## VariableKitZymoDNA   -0.23338    0.21946  -1.063  0.28998    
## SpikeSetSecond        0.66840    0.15065   4.437 2.21e-05 ***
## SpikeSetThird         0.94775    0.14458   6.555 1.96e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6488 on 108 degrees of freedom
## Multiple R-squared:  0.6727, Adjusted R-squared:  0.6484 
## F-statistic: 27.74 on 8 and 108 DF,  p-value: < 2.2e-16
plot(x=predict(m_TotalBacterialDNA.LogCopiespermLofMilk),y=resid(m_TotalBacterialDNA.LogCopiespermLofMilk))

  # using ggplot2
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
 
ggplot(m_TotalBacterialDNA.LogCopiespermLofMilk, aes(x=predict(m_TotalBacterialDNA.LogCopiespermLofMilk), y=resid(m_TotalBacterialDNA.LogCopiespermLofMilk), color=VariableKit)) +
  geom_point()+
  theme_bw()+
  ggtitle("Total Bacterial DNA Innoculated Only - Model Fit - Residuals vs Predicted")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  geom_hline(yintercept = 1) +
  geom_hline(yintercept = -1)

  # qqplots
qqnorm(resid(m_TotalBacterialDNA.LogCopiespermLofMilk))
qqline(resid(m_TotalBacterialDNA.LogCopiespermLofMilk))

summary(m_TotalBacterialDNA.LogCopiespermLofMilk)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = TotalBacterialDNA.InnOnly)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2707 -0.1573  0.0449  0.1796  1.8855 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           6.47037    0.17757  36.439  < 2e-16 ***
## VariableKitEZFood    -2.54574    0.26294  -9.682 2.47e-16 ***
## VariableKitMastitis   0.50018    0.21946   2.279  0.02463 *  
## VariableKitPfood     -0.30000    0.21946  -1.367  0.17448    
## VariableKitPSoilP    -0.69800    0.21946  -3.180  0.00192 ** 
## VariableKitPviralDNA -0.05236    0.21946  -0.239  0.81189    
## VariableKitZymoDNA   -0.23338    0.21946  -1.063  0.28998    
## SpikeSetSecond        0.66840    0.15065   4.437 2.21e-05 ***
## SpikeSetThird         0.94775    0.14458   6.555 1.96e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6488 on 108 degrees of freedom
## Multiple R-squared:  0.6727, Adjusted R-squared:  0.6484 
## F-statistic: 27.74 on 8 and 108 DF,  p-value: < 2.2e-16
#Many large residuals were identified, most belong to EZFood

TotalBacterialDNA.InnOnly$resid <- resid(m_TotalBacterialDNA.LogCopiespermLofMilk)
TotalBacterialDNA.InnOnly %>% 
  filter(abs(resid)>1) %>%
  select(VariableKit,resid) %>%
  group_by(VariableKit) %>%
  summarize(n=n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
##   VariableKit     n
##   <chr>       <int>
## 1 COREDNA         2
## 2 EZFood          8
# Check Tukey-adjusted pairwise comparison of kit estimates
m_TotalBacterialDNA.LogCopiespermLofMilk_emmeans <- emmeans(m_TotalBacterialDNA.LogCopiespermLofMilk,pairwise~VariableKit)
# Use compact letter display for convenience
m_TotalBacterialDNA.LogCopiespermLofMilk_cld <- CLD(m_TotalBacterialDNA.LogCopiespermLofMilk_emmeans$emmeans,
                                         Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
m_TotalBacterialDNA.LogCopiespermLofMilk_cld
##  VariableKit emmean    SE  df lower.CL upper.CL .group
##  EZFood        4.46 0.210 108     4.05     4.88  A    
##  PSoilP        6.31 0.153 108     6.01     6.61   B   
##  Pfood         6.71 0.153 108     6.41     7.01   BC  
##  ZymoDNA       6.78 0.153 108     6.47     7.08   BC  
##  PviralDNA     6.96 0.153 108     6.65     7.26   BCD 
##  COREDNA       7.01 0.157 108     6.70     7.32    CD 
##  Mastitis      7.51 0.153 108     7.21     7.81     D 
## 
## Results are averaged over the levels of: SpikeSet 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 7 estimates 
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(m_TotalBacterialDNA.LogCopiespermLofMilk,~ VariableKit) %>%
  summary() %>%
  data.frame() 
##   VariableKit   emmean        SE  df lower.CL upper.CL
## 1     COREDNA 7.009084 0.1574248 108 6.697041 7.321127
## 2      EZFood 4.463344 0.2103444 108 4.046405 4.880283
## 3    Mastitis 7.509267 0.1529121 108 7.206169 7.812365
## 4       Pfood 6.709086 0.1529121 108 6.405988 7.012185
## 5      PSoilP 6.311083 0.1529121 108 6.007985 6.614182
## 6   PviralDNA 6.956726 0.1529121 108 6.653627 7.259824
## 7     ZymoDNA 6.775709 0.1529121 108 6.472611 7.078807
# Plot fitted values from model
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
emmeans(m_TotalBacterialDNA.LogCopiespermLofMilk,~VariableKit) %>%
  summary() %>%
  data.frame() %>%
  ggplot(aes(x=VariableKit,y=emmean,color=VariableKit)) +
  geom_point() +
  labs(y="Estimated Marginal Means") +
  geom_errorbar(aes(ymin=lower.CL,ymax=upper.CL),width=0.5) +
  geom_text(data=data.frame(m_TotalBacterialDNA.LogCopiespermLofMilk_cld),aes(x=VariableKit,label=`.group`),hjust=-.1) +
 ylim(3.5, 8.5)+
  theme_bw()+
  ggtitle("Total 16S Bacterial DNA Copy Numbers - Inoculated Milk Only")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

TotalBacteria - Models not assuming homoscedasticity

# from https://cran.r-project.org/web/packages/emmeans/vignettes/FAQs.html#contents

library(nlme)
# lm chosen: Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet

mod.Bacteria = nlme::gls(LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=TotalBacterialDNA.InnOnly,
                 weights = varIdent(form = ~1 | VariableKit))
summary(mod.Bacteria)
## Generalized least squares fit by REML
##   Model: LogCopiespermLofMilk ~ VariableKit + SpikeSet 
##   Data: TotalBacterialDNA.InnOnly 
##        AIC      BIC   logLik
##   15.55897 58.47307 8.220514
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | VariableKit 
##  Parameter estimates:
##    COREDNA     EZFood   Mastitis      Pfood     PSoilP  PviralDNA    ZymoDNA 
## 1.00000000 1.84385607 0.04438187 0.16888223 0.13620445 0.09116882 0.22206408 
## 
## Coefficients:
##                          Value Std.Error  t-value p-value
## (Intercept)           6.322454 0.2400657 26.33634  0.0000
## VariableKitEZFood    -2.518615 0.6243592 -4.03392  0.0001
## VariableKitMastitis   0.491624 0.2400002  2.04843  0.0429
## VariableKitPfood     -0.308557 0.2429852 -1.26986  0.2069
## VariableKitPSoilP    -0.706560 0.2418687 -2.92125  0.0042
## VariableKitPviralDNA -0.060918 0.2407165 -0.25307  0.8007
## VariableKitZymoDNA   -0.241934 0.2452972 -0.98629  0.3262
## SpikeSetSecond        0.835842 0.0210066 39.78950  0.0000
## SpikeSetThird         1.249727 0.0210053 59.49570  0.0000
## 
##  Correlation: 
##                      (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA SpkStS
## VariableKitEZFood    -0.384                                                 
## VariableKitMastitis  -0.998  0.384                                          
## VariableKitPfood     -0.986  0.379  0.986                                   
## VariableKitPSoilP    -0.990  0.381  0.990  0.978                            
## VariableKitPviralDNA -0.995  0.383  0.995  0.983  0.987                     
## VariableKitZymoDNA   -0.976  0.375  0.977  0.965  0.969  0.974              
## SpikeSetSecond       -0.044  0.010  0.000  0.000  0.000  0.000  0.000       
## SpikeSetThird        -0.041  0.002 -0.003 -0.003 -0.003 -0.003 -0.003  0.500
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -3.4641570 -0.7559524  0.2070587  0.7225366  2.3754575 
## 
## Residual standard error: 0.9886226 
## Degrees of freedom: 117 total; 108 residual
AIC(m_TotalBacterialDNA.LogCopiespermLofMilk1)
## [1] 241.4133
AIC(mod.Bacteria)
## [1] 15.55897
# Testing simpler model

mod3.Bacteria = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=TotalBacterialDNA.InnOnly,
                 weights = varIdent(form = ~1 | VariableKit))
summary(mod3.Bacteria)
## Generalized least squares fit by REML
##   Model: LogCopiespermLofMilk ~ VariableKit 
##   Data: TotalBacterialDNA.InnOnly 
##        AIC      BIC    logLik
##   254.6237 292.4305 -113.3119
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | VariableKit 
##  Parameter estimates:
##   COREDNA    EZFood  Mastitis     Pfood    PSoilP PviralDNA   ZymoDNA 
## 1.0000000 1.7810698 0.5579628 0.5883981 0.5024870 0.6119406 0.4192329 
## 
## Coefficients:
##                          Value Std.Error   t-value p-value
## (Intercept)           6.985023 0.2338778 29.866127  0.0000
## VariableKitEZFood    -2.681294 0.5913342 -4.534312  0.0000
## VariableKitMastitis   0.524243 0.2660484  1.970482  0.0513
## VariableKitPfood     -0.275937 0.2694144 -1.024210  0.3080
## VariableKitPSoilP    -0.673940 0.2602741 -2.589347  0.0109
## VariableKitPviralDNA -0.028298 0.2721103 -0.103993  0.9174
## VariableKitZymoDNA   -0.209314 0.2525438 -0.828824  0.4090
## 
##  Correlation: 
##                      (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VariableKitEZFood    -0.396                                   
## VariableKitMastitis  -0.879  0.348                            
## VariableKitPfood     -0.868  0.343  0.763                     
## VariableKitPSoilP    -0.899  0.355  0.790  0.780              
## VariableKitPviralDNA -0.859  0.340  0.756  0.746  0.772       
## VariableKitZymoDNA   -0.926  0.366  0.814  0.804  0.832  0.796
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.9426303 -1.0190259  0.1486182  0.8902093  1.5164719 
## 
## Residual standard error: 0.9643028 
## Degrees of freedom: 117 total; 110 residual
AIC(m_TotalBacterialDNA.LogCopiespermLofMilk1)
## [1] 241.4133
AIC(mod.Bacteria) #mod.Bacteria is best model (including SpikeSet)
## [1] 15.55897
AIC(mod3.Bacteria) 
## [1] 254.6237
# Model forcing qPCRefficiency

mod.Bacteria.all = nlme::gls(LogCopiespermLofMilk ~ VariableKit + SpikeSet+qPCRefficiency, data = TotalBacterialDNA.InnOnly,
                 weights = varIdent(form = ~1 | VariableKit))
summary(mod.Bacteria.all)
## Generalized least squares fit by REML
##   Model: LogCopiespermLofMilk ~ VariableKit + SpikeSet + qPCRefficiency 
##   Data: TotalBacterialDNA.InnOnly 
##        AIC      BIC   logLik
##   10.10053 55.53862 11.94973
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | VariableKit 
##  Parameter estimates:
##    COREDNA     EZFood   Mastitis      Pfood     PSoilP  PviralDNA    ZymoDNA 
## 1.00000000 1.81740972 0.04922275 0.16045698 0.12966370 0.07614551 0.20596264 
## 
## Coefficients:
##                          Value Std.Error  t-value p-value
## (Intercept)           7.287423 0.4049640 17.99524  0.0000
## VariableKitEZFood    -2.573698 0.6244850 -4.12131  0.0001
## VariableKitMastitis   0.445932 0.2434423  1.83178  0.0698
## VariableKitPfood     -0.354248 0.2460925 -1.43949  0.1529
## VariableKitPSoilP    -0.752251 0.2450807 -3.06940  0.0027
## VariableKitPviralDNA -0.063134 0.2433529 -0.25943  0.7958
## VariableKitZymoDNA   -0.287625 0.2479698 -1.15992  0.2487
## SpikeSetSecond        1.164434 0.1126872 10.33333  0.0000
## SpikeSetThird         1.348696 0.0378059 35.67421  0.0000
## qPCRefficiency       -1.280854 0.4293443 -2.98328  0.0035
## 
##  Correlation: 
##                      (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA SpkStS
## VariableKitEZFood    -0.256                                                 
## VariableKitMastitis  -0.647  0.389                                          
## VariableKitPfood     -0.640  0.385  0.987                                   
## VariableKitPSoilP    -0.643  0.387  0.991  0.980                            
## VariableKitPviralDNA -0.600  0.388  0.994  0.984  0.988                     
## VariableKitZymoDNA   -0.636  0.382  0.980  0.969  0.973  0.976              
## SpikeSetSecond        0.780 -0.026 -0.061 -0.061 -0.061 -0.003 -0.060       
## SpikeSetThird         0.640 -0.022 -0.053 -0.052 -0.052 -0.004 -0.052  0.859
## qPCRefficiency       -0.800  0.029  0.063  0.062  0.062  0.003  0.061 -0.981
##                      SpkStT
## VariableKitEZFood          
## VariableKitMastitis        
## VariableKitPfood           
## VariableKitPSoilP          
## VariableKitPviralDNA       
## VariableKitZymoDNA         
## SpikeSetSecond             
## SpikeSetThird              
## qPCRefficiency       -0.819
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -3.4624783 -0.7305480  0.1434528  0.8160697  2.3154272 
## 
## Residual standard error: 1.000626 
## Degrees of freedom: 117 total; 107 residual
AIC(mod.Bacteria.all)
## [1] 10.10053
#mod.Bacteria.all not assuming homoscedasticity and including SpikeSet and qPCRefficiency is a better fit

mod.Bacteria.best <- mod.Bacteria.all
AIC(mod.Bacteria.best)
## [1] 10.10053

Bacteria Figure

library(emmeans)
# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Bacteria.best_emmeans <- emmeans(mod.Bacteria.best,pairwise~VariableKit, mode = "df.error")

# Use compact letter display for convenience
mod.Bacteria.best_cld <- CLD(mod.Bacteria.best_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Bacteria.best_cld_letters <- CLD(mod.Bacteria.best_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Bacteria.best_cld_letters
##  VariableKit emmean      SE  df lower.CL upper.CL .group
##  EZFood       4.486 0.57513 100    3.344    5.627  A    
##  PSoilP       6.307 0.03061 100    6.246    6.368   B   
##  Pfood        6.705 0.03787 100    6.630    6.780    C  
##  ZymoDNA      6.772 0.04860 100    6.675    6.868    C  
##  PviralDNA    6.996 0.02229 100    6.952    7.040     D 
##  COREDNA      7.059 0.24308 100    6.577    7.542    CDE
##  Mastitis     7.505 0.01169 100    7.482    7.528      E
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: df.error 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 7 estimates 
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(mod.Bacteria.best,~ VariableKit,mode = "df.error") %>%
  summary() %>%
  data.frame() 
##   VariableKit   emmean         SE  df lower.CL upper.CL
## 1     COREDNA 7.059235 0.24308315 100 6.576965 7.541505
## 2      EZFood 4.485538 0.57512880 100 3.344498 5.626577
## 3    Mastitis 7.505168 0.01169020 100 7.481975 7.528361
## 4       Pfood 6.704987 0.03786867 100 6.629857 6.780118
## 5      PSoilP 6.306984 0.03061200 100 6.246251 6.367718
## 6   PviralDNA 6.996101 0.02228736 100 6.951883 7.040318
## 7     ZymoDNA 6.771610 0.04859566 100 6.675197 6.868022
# Get summary
summary(emmeans(mod.Bacteria.best,~ VariableKit,mode = "df.error"), infer=TRUE)
##  VariableKit emmean      SE  df lower.CL upper.CL t.ratio p.value
##  COREDNA      7.059 0.24308 100    6.577    7.542  29.040 <.0001 
##  EZFood       4.486 0.57513 100    3.344    5.627   7.799 <.0001 
##  Mastitis     7.505 0.01169 100    7.482    7.528 642.005 <.0001 
##  Pfood        6.705 0.03787 100    6.630    6.780 177.059 <.0001 
##  PSoilP       6.307 0.03061 100    6.246    6.368 206.030 <.0001 
##  PviralDNA    6.996 0.02229 100    6.952    7.040 313.904 <.0001 
##  ZymoDNA      6.772 0.04860 100    6.675    6.868 139.346 <.0001 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: df.error 
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod_df1_Bacteria.rawdata<-TotalBacterialDNA.InnOnly[c(25,42,4)]

mod_df2_Bacteria.best.model<-emmeans(mod.Bacteria.best,~VariableKit, mode = "df.error") %>%
  summary() %>%
  data.frame()

ggplot() +
  geom_jitter(data=mod_df1_Bacteria.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
  geom_point(data=mod_df2_Bacteria.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_errorbar(data=mod_df2_Bacteria.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)

# Making the plot pretty

Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
  
ggplot() +
  geom_jitter(data=mod_df1_Bacteria.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet)) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Bacteria.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Bacteria.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Bacteria.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
  theme_bw()+
  ggtitle("Bacteria Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.


Model not assuming homoscedasticity and including VariableKit + SpikeSet + qPCRefficiency was chosen.
qPCRefficiency is forced into all final models

Formula: mod.Bacteria.all = nlme::gls(LogCopiespermLofMilk ~ VariableKit + SpikeSet + qPCRefficiency, data = TotalBacterialDNA.InnOnly.InnOnly,weights = varIdent(form = ~1 | VariableKit))

AIC(mod.Bacteria.all) # best model
10.10053


Other Models for Reference:

mod.Bacteria = nlme::gls(LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=TotalBacterialDNA.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod.Bacteria) 15.55897

mod3.Bacteria = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=TotalBacterialDNA.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod3.Bacteria)
254.6237

Previously chosen Linear Model that assumed homoscedasticity for reference:
m_TotalBacterialDNA.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=TotalBacterialDNA.InnOnly )
AIC(m_TotalBacterialDNA.LogCopiespermLofMilk1)
241.4133



Manuscript Figures: Total Bacteria


# Total Bacteria: Milk Data and Controls
Bacteria.Inn.Ctrl <- TotalBacterialDNA %>% filter(VariableSampleType!="NP40InoculatedMilk")
dim(Bacteria.Inn.Ctrl)
## [1] 240  42
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2338B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")


Bacteria.Inn.Ctrl$VariableSampleType <- factor(Bacteria.Inn.Ctrl$VariableSampleType, levels=c('InoculatedMilk', 'UninoculatedMilk', 'NoTemplateControl', 'MockCommunity'))

ggplot(data=Bacteria.Inn.Ctrl, aes(VariableKit,LogCopiespermLofMilk, color= VariableKit, shape=SpikeSet))+
  scale_shape_discrete(solid=F) +
  ylab ("Bacteria Log10 Copies / mL of Milk")+  
  xlab ("Kit")+
  geom_point(aes(colour = VariableKit), size = 2, stroke = .5, position=position_jitterdodge(jitter.width=0, dodge.width = 1), show.legend = F) +
  facet_wrap(vars(VariableSampleType),nrow = 1)+
  ggtitle("Total Bacteria DNA Copy Numbers - All Samples and Controls")+
  theme_bw()+
  ylim(0, 9)+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=90,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Warning: Removed 22 rows containing missing values (geom_point).

ggsave("Bacteria-AllSamples.TIFF", width = 9, height = 3,units = "in", dpi = 600)
## Warning: Removed 22 rows containing missing values (geom_point).
ggplot(TotalBacterialDNA.InnOnly, aes(VariableKit,LogCopiespermLofMilk,shape = factor(SpikeSet))) +
  scale_shape_discrete(solid=F) +
  geom_point(aes(colour = VariableKit), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = 1)) +
  ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Total Bacteria DNA Copy Numbers - Inoculated Milk Only")+
  theme_bw()+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

# Plot overlaying model estimates to raw data
mod_df1_Bacteria.rawdata<-TotalBacterialDNA.InnOnly[c(25,42,4)]

mod_df2_Bacteria.best.model<-emmeans(mod.Bacteria.best,~VariableKit, mode = "df.error") %>%
  summary() %>%
  data.frame()

ggplot() +
  geom_jitter(data=mod_df1_Bacteria.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2,stroke = 1, width = .2 ) +
   scale_shape_discrete(solid=F) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Bacteria.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Bacteria.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Bacteria.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = -3, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Bacteria DNA Copy Numbers - Inoculated Milk Only")+
   theme_bw()+
   theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

ggsave("Bacteria-Model-Jitter.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)

ggplot() +
  geom_point(data=mod_df1_Bacteria.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = .5)) +
   scale_shape_discrete(solid=F) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Bacteria.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Bacteria.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Bacteria.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = -2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Bacteria DNA Copy Numbers - Inoculated Milk Only")+
   theme_bw()+
   theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

ggsave("Bacteria-Model.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)

Bacillus


Bacillus wiedmannii Copy Numbers

#Data File: CleanDNAprepData1.18.19
library(ggplot2)
library(dplyr)
library(emmeans)
library(multcompView)

#Filter Subset from Sample Data
Bacillus <- SampleData %>% filter(Assay=="Bacillus wiedmannii")
dim(Bacillus)
## [1] 240  42
#Summary Statistics

Bacillus.summary <- Bacillus %>%
  group_by(VariableKit,VariableSampleType) %>%
  summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
            st_dev=sd(LogCopiespermLofMilk,na.rm=T),
            n_missing=sum(is.na(LogCopiespermLofMilk)),
            n_total=n())%>%
  data.frame()
## `summarise()` regrouping output by 'VariableKit' (override with `.groups` argument)
write.table (Bacillus.summary, "Bacillus.summary.txt", sep="\t" )
#Plot Raw Means and Standard Deviations
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(data=Bacillus,
       mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk,z=VariableKit, color=VariableKit, ylab="Copy Numbers")) +
  ylab ("Log10 Copies / mL of Milk")+
  geom_boxplot(lwd=1)+
  theme_bw()+
  ggtitle("Bacillus Copy Numbers")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk",  "MockCommunity", "NoTemplateControl"))
## Warning: Removed 87 rows containing non-finite values (stat_boxplot).

ggplot(data=Bacillus,
       mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
  ylab ("Log10 Copies / mL of Milk")+
      geom_jitter(width=0.25)+
  ggtitle("Bacillus Copy Numbers")+
  theme_bw()+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk",  "MockCommunity", "NoTemplateControl"))
## Warning: Removed 87 rows containing missing values (geom_point).

ggplot(data=Bacillus,
       mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=SpikeSet, shape=VariableKit)) +
  ylab ("Log10 Copies / mL of Milk")+
    geom_jitter(width=0.35)+
  ggtitle("Bacillus Copy Numbers")+
  theme_bw()+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk",  "MockCommunity", "NoTemplateControl"))
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 111 rows containing missing values (geom_point).

# Inoculated Milk Data
Bacillus.InnOnly <- Bacillus %>% filter(VariableSampleType=="InoculatedMilk", LogCopiespermLofMilk>0.001)
Bacillus.InnOnly %>%
  group_by(VariableSampleType, VariableSpikeSet, VariableKit) %>%
  summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
            st_dev=sd(LogCopiespermLofMilk,na.rm=T),
            n_missing=sum(is.na(LogCopiespermLofMilk)),
            n_total=n())%>%
  data.frame()
## `summarise()` regrouping output by 'VariableSampleType', 'VariableSpikeSet' (override with `.groups` argument)
##    VariableSampleType VariableSpikeSet VariableKit mean_LogCopiespermLofMilk
## 1      InoculatedMilk            First     COREDNA                  6.126799
## 2      InoculatedMilk            First      EZFood                  4.199089
## 3      InoculatedMilk            First    Mastitis                  6.541652
## 4      InoculatedMilk            First       Pfood                  5.417196
## 5      InoculatedMilk            First      PSoilP                  5.311014
## 6      InoculatedMilk            First   PviralDNA                  5.518817
## 7      InoculatedMilk            First     ZymoDNA                  6.028139
## 8      InoculatedMilk           Second     COREDNA                  6.433043
## 9      InoculatedMilk           Second    Mastitis                  6.469291
## 10     InoculatedMilk           Second       Pfood                  5.662830
## 11     InoculatedMilk           Second      PSoilP                  5.317932
## 12     InoculatedMilk           Second   PviralDNA                  6.091386
## 13     InoculatedMilk           Second     ZymoDNA                  5.641826
## 14     InoculatedMilk            Third     COREDNA                  7.019606
## 15     InoculatedMilk            Third      EZFood                  4.500444
## 16     InoculatedMilk            Third    Mastitis                  7.012577
## 17     InoculatedMilk            Third       Pfood                  6.400803
## 18     InoculatedMilk            Third      PSoilP                  6.186386
## 19     InoculatedMilk            Third   PviralDNA                  6.674293
## 20     InoculatedMilk            Third     ZymoDNA                  6.397461
##        st_dev n_missing n_total
## 1  0.05946479         0       6
## 2  0.99412039         0       5
## 3  0.02719433         0       6
## 4  0.92199266         0       6
## 5  0.18316303         0       6
## 6  0.08786240         0       6
## 7  0.03599635         0       6
## 8  0.09141025         0       6
## 9  0.01607210         0       6
## 10 0.08101734         0       6
## 11 0.17244727         0       6
## 12 0.08779476         0       6
## 13 0.02664997         0       6
## 14 0.02640080         0       6
## 15 0.18655080         0       3
## 16 0.04008723         0       6
## 17 0.03383823         0       5
## 18 0.12209311         0       6
## 19 0.07478131         0       6
## 20 0.09433067         0       6

Model Selection

Linear Models

#3 linear models were compared: including SpikeSet only, qPCRefficiency only, and both as covariates. Best model fit was used as the final model. 

m_Bacillus.LogCopiespermLofMilk1 <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Bacillus.InnOnly )
summary(m_Bacillus.LogCopiespermLofMilk1)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Bacillus.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.00590 -0.12365  0.02376  0.14482  1.18301 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           6.24938    0.08909  70.144  < 2e-16 ***
## VariableKitEZFood    -2.21559    0.14237 -15.562  < 2e-16 ***
## VariableKitMastitis   0.14802    0.11004   1.345 0.181452    
## VariableKitPfood     -0.70594    0.11168  -6.321 6.26e-09 ***
## VariableKitPSoilP    -0.92137    0.11004  -8.373 2.50e-13 ***
## VariableKitPviralDNA -0.43165    0.11004  -3.923 0.000156 ***
## VariableKitZymoDNA   -0.50401    0.11004  -4.580 1.27e-05 ***
## SpikeSetSecond        0.08916    0.07689   1.160 0.248844    
## SpikeSetThird         0.74214    0.07455   9.955  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3301 on 106 degrees of freedom
## Multiple R-squared:  0.8191, Adjusted R-squared:  0.8054 
## F-statistic: 59.98 on 8 and 106 DF,  p-value: < 2.2e-16
m_Bacillus.LogCopiespermLofMilk2 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, data=Bacillus.InnOnly )
summary(m_Bacillus.LogCopiespermLofMilk2)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, 
##     data = Bacillus.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.25228 -0.24690 -0.04217  0.38180  0.90585 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            6.5051     0.3725  17.466  < 2e-16 ***
## VariableKitEZFood     -2.2118     0.2066 -10.704  < 2e-16 ***
## VariableKitMastitis    0.1484     0.1586   0.936  0.35141    
## VariableKitPfood      -0.7329     0.1609  -4.555 1.39e-05 ***
## VariableKitPSoilP     -0.9210     0.1586  -5.807 6.60e-08 ***
## VariableKitPviralDNA  -0.4316     0.1585  -2.724  0.00753 ** 
## VariableKitZymoDNA    -0.5036     0.1586  -3.175  0.00196 ** 
## qPCRefficiency         0.0284     0.4722   0.060  0.95215    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4753 on 107 degrees of freedom
## Multiple R-squared:  0.6214, Adjusted R-squared:  0.5966 
## F-statistic: 25.08 on 7 and 107 DF,  p-value: < 2.2e-16
m_Bacillus.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Bacillus.InnOnly )
summary(m_Bacillus.LogCopiespermLofMilk3)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + 
##     SpikeSet, data = Bacillus.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.09840 -0.07968  0.02759  0.11999  1.13063 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           12.986714   1.969535   6.594 1.77e-09 ***
## VariableKitEZFood     -2.411255   0.147215 -16.379  < 2e-16 ***
## VariableKitMastitis   -0.004911   0.113983  -0.043 0.965719    
## VariableKitPfood      -0.861498   0.115718  -7.445 2.81e-11 ***
## VariableKitPSoilP     -1.074306   0.113983  -9.425 1.20e-15 ***
## VariableKitPviralDNA  -0.431651   0.104867  -4.116 7.69e-05 ***
## VariableKitZymoDNA    -0.656942   0.113983  -5.763 8.33e-08 ***
## qPCRefficiency       -10.473575   3.058912  -3.424 0.000881 ***
## SpikeSetSecond         2.526123   0.715502   3.531 0.000617 ***
## SpikeSetThird          1.727832   0.296519   5.827 6.25e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3146 on 105 degrees of freedom
## Multiple R-squared:  0.8372, Adjusted R-squared:  0.8233 
## F-statistic: 60.02 on 9 and 105 DF,  p-value: < 2.2e-16
# Fit of model with both qPCRefficiency and SpikeSet is slightly better than fit of model with SpikeSet only
anova(m_Bacillus.LogCopiespermLofMilk1, m_Bacillus.LogCopiespermLofMilk2, m_Bacillus.LogCopiespermLofMilk3)
## Analysis of Variance Table
## 
## Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency
## Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
##   Res.Df    RSS Df Sum of Sq       F    Pr(>F)    
## 1    106 11.553                                   
## 2    107 24.177 -1   -12.624 127.549 < 2.2e-16 ***
## 3    105 10.392  2    13.784  69.636 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(m_Bacillus.LogCopiespermLofMilk1)
## [1] 82.0835
AIC(m_Bacillus.LogCopiespermLofMilk2)
## [1] 165.0082
AIC(m_Bacillus.LogCopiespermLofMilk3)
## [1] 71.91107
# Final model chosen:
# Model 3:  LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet

Mixed effects models - for reference

library(lme4)
library(lmerTest)

model1 = lmer(LogCopiespermLofMilk ~ VariableKit + (1|SpikeSet),
            data=Bacillus.InnOnly,
            REML=TRUE)
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
##    Data: Bacillus.InnOnly
## 
## REML criterion at convergence: 94.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.0884 -0.3868  0.0730  0.4565  3.5691 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  SpikeSet (Intercept) 0.1614   0.4018  
##  Residual             0.1090   0.3301  
## Number of obs: 115, groups:  SpikeSet, 3
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)            6.5265     0.2447   2.3902  26.675 0.000514 ***
## VariableKitEZFood     -2.2154     0.1423 106.1124 -15.565  < 2e-16 ***
## VariableKitMastitis    0.1480     0.1100 106.0017   1.345 0.181451    
## VariableKitPfood      -0.7064     0.1117 106.0037  -6.325 6.14e-09 ***
## VariableKitPSoilP     -0.9214     0.1100 106.0017  -8.373 2.50e-13 ***
## VariableKitPviralDNA  -0.4317     0.1100 106.0017  -3.923 0.000156 ***
## VariableKitZymoDNA    -0.5040     0.1100 106.0017  -4.580 1.27e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VarblKtEZFd -0.174                                   
## VrblKtMstts -0.225  0.387                            
## VariblKtPfd -0.222  0.380  0.493                     
## VarblKtPSlP -0.225  0.387  0.500  0.493              
## VrblKtPvDNA -0.225  0.387  0.500  0.493  0.500       
## VrblKtZyDNA -0.225  0.387  0.500  0.493  0.500  0.500
model2 = lmer(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1|SpikeSet),
            data=Bacillus.InnOnly,
            REML=TRUE)
summary(model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
##    Data: Bacillus.InnOnly
## 
## REML criterion at convergence: 83.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.6253 -0.2423  0.0675  0.3670  3.6148 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  SpikeSet (Intercept) 1.32448  1.1509  
##  Residual             0.09911  0.3148  
## Number of obs: 115, groups:  SpikeSet, 3
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)           13.12759    2.21117  23.01830   5.937 4.70e-06 ***
## VariableKitEZFood     -2.38084    0.14566 106.96631 -16.346  < 2e-16 ***
## VariableKitMastitis    0.01988    0.11263 106.85800   0.177  0.86023    
## VariableKitPfood      -0.83631    0.11434 106.84974  -7.314 5.00e-11 ***
## VariableKitPSoilP     -1.04951    0.11263 106.85800  -9.318 1.80e-15 ***
## VariableKitPviralDNA  -0.43165    0.10494 104.68351  -4.113 7.79e-05 ***
## VariableKitZymoDNA    -0.63215    0.11263 106.85800  -5.612 1.59e-07 ***
## qPCRefficiency        -8.77574    2.80201  41.55916  -3.132  0.00318 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA
## VarblKtEZFd -0.362                                          
## VrblKtMstts -0.368  0.467                                   
## VariblKtPfd -0.369  0.462  0.560                            
## VarblKtPSlP -0.368  0.467  0.566  0.560                     
## VrblKtPvDNA -0.024  0.360  0.466  0.459  0.466              
## VrblKtZyDNA -0.368  0.467  0.566  0.560  0.566  0.466       
## qPCReffcncy -0.953  0.362  0.363  0.364  0.363  0.000  0.363
anova(model1, model2)
## refitting model(s) with ML (instead of REML)
## Data: Bacillus.InnOnly
## Models:
## model1: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
## model2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
##        npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)  
## model1    9 94.236 118.94 -38.118   76.236                       
## model2   10 90.603 118.05 -35.301   70.603 5.6334  1    0.01762 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC (model1)
## [1] 112.5384
AIC (model2)
## [1] 103.7505
AIC(m_Bacillus.LogCopiespermLofMilk3) #still has better fit
## [1] 71.91107

Final Model

m_Bacillus.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Bacillus.InnOnly )
summary(m_Bacillus.LogCopiespermLofMilk)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + 
##     SpikeSet, data = Bacillus.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.09840 -0.07968  0.02759  0.11999  1.13063 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           12.986714   1.969535   6.594 1.77e-09 ***
## VariableKitEZFood     -2.411255   0.147215 -16.379  < 2e-16 ***
## VariableKitMastitis   -0.004911   0.113983  -0.043 0.965719    
## VariableKitPfood      -0.861498   0.115718  -7.445 2.81e-11 ***
## VariableKitPSoilP     -1.074306   0.113983  -9.425 1.20e-15 ***
## VariableKitPviralDNA  -0.431651   0.104867  -4.116 7.69e-05 ***
## VariableKitZymoDNA    -0.656942   0.113983  -5.763 8.33e-08 ***
## qPCRefficiency       -10.473575   3.058912  -3.424 0.000881 ***
## SpikeSetSecond         2.526123   0.715502   3.531 0.000617 ***
## SpikeSetThird          1.727832   0.296519   5.827 6.25e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3146 on 105 degrees of freedom
## Multiple R-squared:  0.8372, Adjusted R-squared:  0.8233 
## F-statistic: 60.02 on 9 and 105 DF,  p-value: < 2.2e-16
plot(x=predict(m_Bacillus.LogCopiespermLofMilk),y=resid(m_Bacillus.LogCopiespermLofMilk))

  # using ggplot2
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
 
ggplot(m_Bacillus.LogCopiespermLofMilk, aes(x=predict(m_Bacillus.LogCopiespermLofMilk), y=resid(m_Bacillus.LogCopiespermLofMilk), color=VariableKit)) +
  geom_point()+
  theme_bw()+
  ggtitle("Bacillus Innoculated Only - Model Fit - Residuals vs Predicted")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  geom_hline(yintercept = 1) +
  geom_hline(yintercept = -1)

  # qqplots
qqnorm(resid(m_Bacillus.LogCopiespermLofMilk))
qqline(resid(m_Bacillus.LogCopiespermLofMilk))

summary(m_Bacillus.LogCopiespermLofMilk)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + 
##     SpikeSet, data = Bacillus.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.09840 -0.07968  0.02759  0.11999  1.13063 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           12.986714   1.969535   6.594 1.77e-09 ***
## VariableKitEZFood     -2.411255   0.147215 -16.379  < 2e-16 ***
## VariableKitMastitis   -0.004911   0.113983  -0.043 0.965719    
## VariableKitPfood      -0.861498   0.115718  -7.445 2.81e-11 ***
## VariableKitPSoilP     -1.074306   0.113983  -9.425 1.20e-15 ***
## VariableKitPviralDNA  -0.431651   0.104867  -4.116 7.69e-05 ***
## VariableKitZymoDNA    -0.656942   0.113983  -5.763 8.33e-08 ***
## qPCRefficiency       -10.473575   3.058912  -3.424 0.000881 ***
## SpikeSetSecond         2.526123   0.715502   3.531 0.000617 ***
## SpikeSetThird          1.727832   0.296519   5.827 6.25e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3146 on 105 degrees of freedom
## Multiple R-squared:  0.8372, Adjusted R-squared:  0.8233 
## F-statistic: 60.02 on 9 and 105 DF,  p-value: < 2.2e-16
#Few large residuals were identified, most belong to EZFood

Bacillus.InnOnly$resid <- resid(m_Bacillus.LogCopiespermLofMilk)
Bacillus.InnOnly %>% 
  filter(abs(resid)>1) %>%
  select(VariableKit,resid) %>%
  group_by(VariableKit) %>%
  summarize(n=n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
##   VariableKit     n
##   <chr>       <int>
## 1 EZFood          2
## 2 Pfood           1
# Check Tukey-adjusted pairwise comparison of kit estimates
m_Bacillus.LogCopiespermLofMilk_emmeans <- emmeans(m_Bacillus.LogCopiespermLofMilk,pairwise~VariableKit)
# Use compact letter display for convenience
m_Bacillus.LogCopiespermLofMilk_cld <- CLD(m_Bacillus.LogCopiespermLofMilk_emmeans$emmeans,
                                         Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
m_Bacillus.LogCopiespermLofMilk_cld
##  VariableKit emmean     SE  df lower.CL upper.CL .group
##  EZFood        4.28 0.1141 105     4.05     4.50  A    
##  PSoilP        5.61 0.0742 105     5.47     5.76   B   
##  Pfood         5.83 0.0764 105     5.67     5.98   BC  
##  ZymoDNA       6.03 0.0742 105     5.88     6.18    CD 
##  PviralDNA     6.26 0.0878 105     6.08     6.43     D 
##  Mastitis      6.68 0.0742 105     6.54     6.83      E
##  COREDNA       6.69 0.0878 105     6.51     6.86      E
## 
## Results are averaged over the levels of: SpikeSet 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 7 estimates 
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(m_Bacillus.LogCopiespermLofMilk,~ VariableKit) %>%
  summary() %>%
  data.frame() 
##   VariableKit   emmean         SE  df lower.CL upper.CL
## 1     COREDNA 6.687205 0.08776095 105 6.513191 6.861219
## 2      EZFood 4.275950 0.11407101 105 4.049768 4.502131
## 3    Mastitis 6.682295 0.07418720 105 6.535195 6.829394
## 4       Pfood 5.825707 0.07635634 105 5.674307 5.977108
## 5      PSoilP 5.612899 0.07418720 105 5.465799 5.759998
## 6   PviralDNA 6.255554 0.08776095 105 6.081540 6.429568
## 7     ZymoDNA 6.030263 0.07418720 105 5.883164 6.177363
# Plot fitted values from model
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
emmeans(m_Bacillus.LogCopiespermLofMilk,~VariableKit) %>%
  summary() %>%
  data.frame() %>%
  ggplot(aes(x=VariableKit,y=emmean,color=VariableKit)) +
  geom_point() +
  labs(y="Estimated Marginal Means") +
  geom_errorbar(aes(ymin=lower.CL,ymax=upper.CL),width=0.5) +
  geom_text(data=data.frame(m_Bacillus.LogCopiespermLofMilk_cld),aes(x=VariableKit,label=`.group`),hjust=-.1) +
  theme_bw()+
  ggtitle("Bacillus Copy Numbers - Inoculated Milk Only")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

Bacillus - Models not assuming homoscedasticity

# from https://cran.r-project.org/web/packages/emmeans/vignettes/FAQs.html#contents

library(nlme)
# Final model chosen:
# Model 3:  LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
# m_Bacillus.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Bacillus.InnOnly )

mod.Bacillus = nlme::gls(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Bacillus.InnOnly,
                 weights = varIdent(form = ~1 | VariableKit))
summary(mod.Bacillus)
## Generalized least squares fit by REML
##   Model: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet 
##   Data: Bacillus.InnOnly 
##         AIC       BIC   logLik
##   -48.92209 -3.804763 41.46104
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | VariableKit 
##  Parameter estimates:
##    COREDNA     EZFood   Mastitis      Pfood     PSoilP  PviralDNA    ZymoDNA 
##  1.0000000 10.8226076  0.4338227  7.9479602  3.2866606  1.7999703  1.9815334 
## 
## Coefficients:
##                           Value Std.Error    t-value p-value
## (Intercept)           13.157805 0.4998031  26.325978  0.0000
## VariableKitEZFood     -2.418889 0.2708292  -8.931416  0.0000
## VariableKitMastitis   -0.007651 0.0214136  -0.357305  0.7216
## VariableKitPfood      -0.868575 0.1374938  -6.317193  0.0000
## VariableKitPSoilP     -1.077047 0.0582532 -18.489047  0.0000
## VariableKitPviralDNA  -0.431651 0.0342402 -12.606569  0.0000
## VariableKitZymoDNA    -0.659682 0.0386293 -17.077236  0.0000
## qPCRefficiency       -10.661269 0.7808058 -13.654188  0.0000
## SpikeSetSecond         2.542006 0.1867661  13.610639  0.0000
## SpikeSetThird          1.622224 0.0790107  20.531702  0.0000
## 
##  Correlation: 
##                      (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA qPCRff
## VariableKitEZFood    -0.051                                                 
## VariableKitMastitis  -0.558  0.074                                          
## VariableKitPfood     -0.088  0.012  0.139                                   
## VariableKitPSoilP    -0.205  0.027  0.326  0.051                            
## VariableKitPviralDNA -0.016  0.030  0.377  0.059  0.139                     
## VariableKitZymoDNA   -0.309  0.041  0.491  0.077  0.181  0.209              
## qPCRefficiency       -0.999  0.049  0.532  0.084  0.196  0.000  0.295       
## SpikeSetSecond        0.995 -0.047 -0.531 -0.084 -0.195  0.000 -0.294 -0.997
## SpikeSetThird         0.977 -0.047 -0.522 -0.082 -0.192  0.000 -0.289 -0.981
##                      SpkStS
## VariableKitEZFood          
## VariableKitMastitis        
## VariableKitPfood           
## VariableKitPSoilP          
## VariableKitPviralDNA       
## VariableKitZymoDNA         
## qPCRefficiency             
## SpikeSetSecond             
## SpikeSetThird         0.986
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -3.8274028 -0.6343972  0.1141199  0.6254420  1.6446439 
## 
## Residual standard error: 0.0705496 
## Degrees of freedom: 115 total; 105 residual
AIC(m_Bacillus.LogCopiespermLofMilk3)
## [1] 71.91107
AIC(mod.Bacillus)
## [1] -48.92209
# Testing simpler model

mod3.Bacillus = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=Bacillus.InnOnly,
                 weights = varIdent(form = ~1 | VariableKit))
summary(mod3.Bacillus)
## Generalized least squares fit by REML
##   Model: LogCopiespermLofMilk ~ VariableKit 
##   Data: Bacillus.InnOnly 
##        AIC      BIC    logLik
##   167.8347 205.3845 -69.91734
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | VariableKit 
##  Parameter estimates:
##   COREDNA    EZFood  Mastitis     Pfood    PSoilP PviralDNA   ZymoDNA 
## 1.0000000 2.0050656 0.6461050 1.7238368 1.1640521 1.2739433 0.8354346 
## 
## Coefficients:
##                          Value  Std.Error  t-value p-value
## (Intercept)           6.526483 0.09097939 71.73584  0.0000
## VariableKitEZFood    -2.214386 0.28835800 -7.67929  0.0000
## VariableKitMastitis   0.148024 0.10831713  1.36658  0.1746
## VariableKitPfood     -0.733296 0.18525902 -3.95822  0.0001
## VariableKitPSoilP    -0.921372 0.13961757 -6.59925  0.0000
## VariableKitPviralDNA -0.431651 0.14734537 -2.92952  0.0041
## VariableKitZymoDNA   -0.504007 0.11855110 -4.25139  0.0000
## 
##  Correlation: 
##                      (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VariableKitEZFood    -0.316                                   
## VariableKitMastitis  -0.840  0.265                            
## VariableKitPfood     -0.491  0.155  0.412                     
## VariableKitPSoilP    -0.652  0.206  0.547  0.320              
## VariableKitPviralDNA -0.617  0.195  0.519  0.303  0.402       
## VariableKitZymoDNA   -0.767  0.242  0.645  0.377  0.500  0.474
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -3.38996911 -0.80202011 -0.09719026  0.97395001  1.62044688 
## 
## Residual standard error: 0.3859929 
## Degrees of freedom: 115 total; 108 residual
AIC(m_Bacillus.LogCopiespermLofMilk3)
## [1] 71.91107
AIC(mod.Bacillus)
## [1] -48.92209
AIC(mod3.Bacillus) #mod.Bacillus is best model (including qPCRefficiency and SpikeSet)
## [1] 167.8347
#mod.Bacillus not assuming homoscedasticity and including qPCRefficiency and SpikeSet has better fit 

mod.Bacillus.best <- mod.Bacillus

Final Bacillus Figure

# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Bacillus_emmeans <- emmeans(mod.Bacillus,pairwise~VariableKit)
# Use compact letter display for convenience
mod.Bacillus_cld <- CLD(mod.Bacillus_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Bacillus_cld_letters <- CLD(mod.Bacillus_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Bacillus_cld
## $emmeans
##  VariableKit emmean       SE    df lower.CL upper.CL .group
##  EZFood       4.271 0.269995  7.01    3.633    4.910  A    
##  PSoilP       5.613 0.054656 16.72    5.498    5.728   B   
##  Pfood        5.822 0.135998 15.98    5.533    6.110   BCD 
##  ZymoDNA      6.030 0.032955 15.73    5.960    6.100    C  
##  PviralDNA    6.258 0.032240 14.08    6.189    6.328     D 
##  Mastitis     6.682 0.007237 13.38    6.667    6.698      E
##  COREDNA      6.690 0.020496 13.76    6.646    6.734      E
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: satterthwaite 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 7 estimates 
## significance level used: alpha = 0.05 
## 
## $comparisons
##  contrast             estimate     SE    df t.ratio p.value
##  PSoilP - EZFood       1.34184 0.2755  7.59  4.871  0.0151 
##  Pfood - EZFood        1.55031 0.3023 10.71  5.128  0.0047 
##  Pfood - PSoilP        0.20847 0.1466 21.02  1.422  0.7842 
##  ZymoDNA - EZFood      1.75921 0.2720  7.22  6.468  0.0033 
##  ZymoDNA - PSoilP      0.41736 0.0638 27.30  6.540  <.0001 
##  ZymoDNA - Pfood       0.20889 0.1399 17.86  1.493  0.7454 
##  PviralDNA - EZFood    1.98724 0.2720  7.21  7.307  0.0015 
##  PviralDNA - PSoilP    0.64540 0.0633 26.35 10.188  <.0001 
##  PviralDNA - Pfood     0.43692 0.1397 17.75  3.127  0.0712 
##  PviralDNA - ZymoDNA   0.22803 0.0460 29.39  4.962  0.0005 
##  Mastitis - EZFood     2.41124 0.2701  7.02  8.927  0.0005 
##  Mastitis - PSoilP     1.06940 0.0551 17.32 19.399  <.0001 
##  Mastitis - Pfood      0.86092 0.1362 16.07  6.322  0.0002 
##  Mastitis - ZymoDNA    0.65203 0.0337 17.37 19.330  <.0001 
##  Mastitis - PviralDNA  0.42400 0.0328 15.08 12.914  <.0001 
##  COREDNA - EZFood      2.41889 0.2708  7.09  8.931  0.0005 
##  COREDNA - PSoilP      1.07705 0.0583 21.06 18.489  <.0001 
##  COREDNA - Pfood       0.86857 0.1375 16.69  6.317  0.0001 
##  COREDNA - ZymoDNA     0.65968 0.0386 25.28 17.077  <.0001 
##  COREDNA - PviralDNA   0.43165 0.0342 20.74 12.607  <.0001 
##  COREDNA - Mastitis    0.00765 0.0214 16.70  0.357  0.9998 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: satterthwaite 
## P value adjustment: tukey method for comparing a family of 7 estimates
# Get fitted values from model to plot with other software
emmeans(mod.Bacillus,~ VariableKit) %>%
  summary() %>%
  data.frame() 
##   VariableKit   emmean          SE        df lower.CL upper.CL
## 1     COREDNA 6.690085 0.020495821 13.764217 6.646055 6.734115
## 2      EZFood 4.271197 0.269995430  7.006771 3.632884 4.909509
## 3    Mastitis 6.682434 0.007237233 13.383434 6.666844 6.698024
## 4       Pfood 5.821511 0.135997649 15.983425 5.533184 6.109837
## 5      PSoilP 5.613038 0.054655974 16.724284 5.497579 5.728497
## 6   PviralDNA 6.258434 0.032240343 14.084091 6.189324 6.327544
## 7     ZymoDNA 6.030403 0.032955438 15.730850 5.960443 6.100363
# Get summary
summary(emmeans(mod.Bacillus,~ VariableKit), infer=TRUE)
##  VariableKit emmean       SE    df lower.CL upper.CL t.ratio p.value
##  COREDNA      6.690 0.020496 13.76    6.646    6.734 326.412 <.0001 
##  EZFood       4.271 0.269995  7.01    3.633    4.910  15.819 <.0001 
##  Mastitis     6.682 0.007237 13.38    6.667    6.698 923.341 <.0001 
##  Pfood        5.822 0.135998 15.98    5.533    6.110  42.806 <.0001 
##  PSoilP       5.613 0.054656 16.72    5.498    5.728 102.698 <.0001 
##  PviralDNA    6.258 0.032240 14.08    6.189    6.328 194.118 <.0001 
##  ZymoDNA      6.030 0.032955 15.73    5.960    6.100 182.987 <.0001 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: satterthwaite 
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod.Bacillus_df1_Bacillus.rawdata<-Bacillus.InnOnly[c(25,42,4)]

mod.Bacillus_df2_Bacillus.model<-emmeans(mod.Bacillus,~VariableKit) %>%
  summary() %>%
  data.frame()

ggplot() +
  geom_jitter(data=mod.Bacillus_df1_Bacillus.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
  geom_point(data=mod.Bacillus_df2_Bacillus.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_errorbar(data=mod.Bacillus_df2_Bacillus.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)

# Making the plot pretty

Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")


ggplot() +
  geom_jitter(data=mod.Bacillus_df1_Bacillus.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
  scale_color_manual(values=Colors)+
   geom_errorbar(data=mod.Bacillus_df2_Bacillus.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)+
   geom_point(data=mod.Bacillus_df2_Bacillus.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Bacillus_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold")+
ylim(3.0, 8.0)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
  theme_bw()+
  ggtitle("Bacillus Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Bacillus Figure

library(emmeans)
# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Bacillus.best_emmeans <- emmeans(mod.Bacillus.best,pairwise~VariableKit, mode = "df.error")

# Use compact letter display for convenience
mod.Bacillus.best_cld <- CLD(mod.Bacillus.best_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Bacillus.best_cld_letters <- CLD(mod.Bacillus.best_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Bacillus.best_cld_letters
##  VariableKit emmean       SE df lower.CL upper.CL .group
##  EZFood       4.271 0.269995 98    3.735    4.807  A    
##  PSoilP       5.613 0.054656 98    5.505    5.722   B   
##  Pfood        5.822 0.135998 98    5.552    6.091   BC  
##  ZymoDNA      6.030 0.032955 98    5.965    6.096    C  
##  PviralDNA    6.258 0.032240 98    6.194    6.322     D 
##  Mastitis     6.682 0.007237 98    6.668    6.697      E
##  COREDNA      6.690 0.020496 98    6.649    6.731      E
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: df.error 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 7 estimates 
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(mod.Bacillus.best,~ VariableKit,mode = "df.error") %>%
  summary() %>%
  data.frame() 
##   VariableKit   emmean          SE df lower.CL upper.CL
## 1     COREDNA 6.690085 0.020495821 98 6.649412 6.730759
## 2      EZFood 4.271197 0.269995430 98 3.735400 4.806994
## 3    Mastitis 6.682434 0.007237233 98 6.668072 6.696796
## 4       Pfood 5.821511 0.135997649 98 5.551628 6.091394
## 5      PSoilP 5.613038 0.054655974 98 5.504575 5.721501
## 6   PviralDNA 6.258434 0.032240343 98 6.194454 6.322414
## 7     ZymoDNA 6.030403 0.032955438 98 5.965004 6.095802
# Get summary
summary(emmeans(mod.Bacillus.best,~ VariableKit,mode = "df.error"), infer=TRUE)
##  VariableKit emmean       SE df lower.CL upper.CL t.ratio p.value
##  COREDNA      6.690 0.020496 98    6.649    6.731 326.412 <.0001 
##  EZFood       4.271 0.269995 98    3.735    4.807  15.819 <.0001 
##  Mastitis     6.682 0.007237 98    6.668    6.697 923.341 <.0001 
##  Pfood        5.822 0.135998 98    5.552    6.091  42.806 <.0001 
##  PSoilP       5.613 0.054656 98    5.505    5.722 102.698 <.0001 
##  PviralDNA    6.258 0.032240 98    6.194    6.322 194.118 <.0001 
##  ZymoDNA      6.030 0.032955 98    5.965    6.096 182.987 <.0001 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: df.error 
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod_df1_Bacillus.rawdata<-Bacillus.InnOnly[c(25,42,4)]

mod_df2_Bacillus.best.model<-emmeans(mod.Bacillus.best,~VariableKit, mode = "df.error") %>%
  summary() %>%
  data.frame()

ggplot() +
  geom_jitter(data=mod_df1_Bacillus.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
  geom_point(data=mod_df2_Bacillus.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_errorbar(data=mod_df2_Bacillus.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)

# Making the plot pretty

Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
  
ggplot() +
  geom_jitter(data=mod_df1_Bacillus.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet)) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Bacillus.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Bacillus.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Bacillus.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
  theme_bw()+
  ggtitle("Bacillus Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
***

Model not assuming homoscedasticity and including VariableKit + SpikeSet + qPCRefficiency was chosen.
qPCRefficiency is forced into all final models

Formula:mod.Bacillus = nlme::gls(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Bacillus.InnOnly, weights = varIdent(form = ~1 | VariableKit))

AIC(mod.Bacillus)
-48.92209


Other Models for Reference:

Formula: mod3.Bacillus = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=Bacillus.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod3.Bacillus)
167.8347

Previously chosen Linear Model that assumed homoscedasticity for reference:
Formula: m_Bacillus.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Bacillus.InnOnly )
AIC(m_Bacillus.LogCopiespermLofMilk3)
71.91107



Manuscript Figures: Bacillus


# Bacillus: Milk Data and Controls
Bacillus.Inn.Ctrl <- Bacillus %>% filter(VariableSampleType!="NP40InoculatedMilk")
dim(Bacillus.Inn.Ctrl)
## [1] 240  42
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2338B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")


Bacillus.Inn.Ctrl$VariableSampleType <- factor(Bacillus.Inn.Ctrl$VariableSampleType, levels=c('InoculatedMilk', 'UninoculatedMilk', 'NoTemplateControl', 'MockCommunity'))

ggplot(data=Bacillus.Inn.Ctrl, aes(VariableKit,LogCopiespermLofMilk, color= VariableKit, shape=SpikeSet))+
  scale_shape_discrete(solid=F) +
  ylab ("Bacillus Log10 Copies / mL of Milk")+  
  xlab ("Kit")+
  geom_point(aes(colour = VariableKit), size = 2, stroke = .5, position=position_jitterdodge(jitter.width=0, dodge.width = 1), show.legend = F) +
  facet_wrap(vars(VariableSampleType),nrow = 1)+
  ggtitle("Bacillus DNA Copy Numbers - All Samples and Controls")+
  theme_bw()+
  ylim(0, 9)+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=90,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Warning: Removed 87 rows containing missing values (geom_point).

ggsave("Bacillus-AllSamples.TIFF", width = 9, height = 3,units = "in", dpi = 600)
## Warning: Removed 87 rows containing missing values (geom_point).
ggplot(Bacillus.InnOnly, aes(VariableKit,LogCopiespermLofMilk,shape = factor(SpikeSet))) +
  scale_shape_discrete(solid=F) +
  geom_point(aes(colour = VariableKit), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = 1)) +
  ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Bacillus DNA Copy Numbers - Inoculated Milk Only")+
  theme_bw()+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

# Plot overlaying model estimates to raw data
mod_df1_Bacillus.rawdata<- Bacillus.InnOnly[c(25,42,4)]

mod_df2_Bacillus.best.model<-emmeans(mod3,~VariableKit, mode = "df.error") %>%
  summary() %>%
  data.frame()

ggplot() +
  geom_jitter(data=mod_df1_Bacillus.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2,stroke = 1, width = .2 ) +
   scale_shape_discrete(solid=F) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Bacillus.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Bacillus.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Bacillus.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = -2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Bacillus DNA Copy Numbers - Inoculated Milk Only")+
   theme_bw()+
   theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

ggsave("Bacillus-Model-Jitter.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)


ggplot() +
  geom_point(data=mod_df1_Bacillus.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = .5)) +
   scale_shape_discrete(solid=F) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Bacillus.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Bacillus.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Bacillus.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = -2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Bacillus DNA Copy Numbers - Inoculated Milk Only")+
   theme_bw()+
   theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

ggsave("Bacillus-Model.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)

Listeria


Listeria monocytogenes Copy Numbers

#Data File: CleanDNAprepData1.18.19
library(ggplot2)
library(dplyr)
library(emmeans)
library(multcompView)

#Filter Subset from Sample Data
Listeria <- SampleData %>% filter(Assay=="Listeria monocytogenes")
dim(Listeria)
## [1] 240  42
#Summary Statistics

Listeria.summary <- Listeria %>%
 group_by(VariableKit,VariableSampleType) %>%
  summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
            st_dev=sd(LogCopiespermLofMilk,na.rm=T),
            n_missing=sum(is.na(LogCopiespermLofMilk)),
            n_total=n())%>%
  data.frame()
## `summarise()` regrouping output by 'VariableKit' (override with `.groups` argument)
write.table (Listeria.summary, "Listeria.summary.txt", sep="\t" )
#Plot Raw Means and Standard Deviations
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(data=Listeria,
       mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk,z=VariableKit, color=VariableKit, ylab="Copy Numbers")) +
  ylab ("Log10 Copies / mL of Milk")+
  geom_boxplot(lwd=1)+
  theme_bw()+
  ggtitle("Listeria Copy Numbers")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk",  "MockCommunity", "NoTemplateControl"))
## Warning: Removed 130 rows containing non-finite values (stat_boxplot).

ggplot(data=Listeria,
       mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
  ylab ("Log10 Copies / mL of Milk")+
      geom_jitter(width=0.25)+
  ggtitle("Listeria Copy Numbers")+
  theme_bw()+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk",  "MockCommunity", "NoTemplateControl"))
## Warning: Removed 130 rows containing missing values (geom_point).

ggplot(data=Listeria,
       mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=SpikeSet, shape=VariableKit)) +
  ylab ("Log10 Copies / mL of Milk")+
    geom_jitter(width=0.35)+
  ggtitle("Listeria Copy Numbers")+
  theme_bw()+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk",  "MockCommunity", "NoTemplateControl"))
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 150 rows containing missing values (geom_point).

# Inoculated Milk Data
Listeria.InnOnly <- Listeria %>% filter(VariableSampleType=="InoculatedMilk", LogCopiespermLofMilk>0.001)
Listeria.InnOnly %>%
  group_by(VariableSampleType, VariableSpikeSet, VariableKit) %>%
  summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
            st_dev=sd(LogCopiespermLofMilk,na.rm=T),
            n_missing=sum(is.na(LogCopiespermLofMilk)),
            n_total=n())%>%
  data.frame()
## `summarise()` regrouping output by 'VariableSampleType', 'VariableSpikeSet' (override with `.groups` argument)
##    VariableSampleType VariableSpikeSet VariableKit mean_LogCopiespermLofMilk
## 1      InoculatedMilk            First     COREDNA                  4.996165
## 2      InoculatedMilk            First    Mastitis                  5.920458
## 3      InoculatedMilk            First       Pfood                  5.261206
## 4      InoculatedMilk            First      PSoilP                  4.589896
## 5      InoculatedMilk            First     ZymoDNA                  5.217074
## 6      InoculatedMilk           Second     COREDNA                  6.764514
## 7      InoculatedMilk           Second    Mastitis                  6.925829
## 8      InoculatedMilk           Second       Pfood                  6.209820
## 9      InoculatedMilk           Second      PSoilP                  5.652350
## 10     InoculatedMilk           Second   PviralDNA                  6.818189
## 11     InoculatedMilk           Second     ZymoDNA                  6.299305
## 12     InoculatedMilk            Third     COREDNA                  6.868625
## 13     InoculatedMilk            Third    Mastitis                  6.906907
## 14     InoculatedMilk            Third       Pfood                  6.311089
## 15     InoculatedMilk            Third      PSoilP                  6.058299
## 16     InoculatedMilk            Third   PviralDNA                  6.871705
## 17     InoculatedMilk            Third     ZymoDNA                  6.483661
##        st_dev n_missing n_total
## 1  0.07118535         0       6
## 2  0.03284588         0       6
## 3  0.05882789         0       6
## 4  0.10368861         0       5
## 5  0.10607334         0       6
## 6  0.06440611         0       6
## 7  0.05305144         0       6
## 8  0.05854870         0       6
## 9  0.28801610         0       4
## 10 0.10167286         0       6
## 11 0.06699653         0       6
## 12 0.03112356         0       6
## 13 0.01320150         0       6
## 14 0.02199862         0       6
## 15 0.11216514         0       6
## 16 0.08573938         0       6
## 17 0.19226732         0       6

Model Selection

Linear Models

#3 linear models were compared: including SpikeSet only, qPCRefficiency only, and both as covariates. Best model fit was used as the final model. 
m_Listeria.LogCopiespermLofMilk1 <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Listeria.InnOnly )
summary(m_Listeria.LogCopiespermLofMilk1)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Listeria.InnOnly)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4850 -0.1104  0.0259  0.1451  0.2849 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           5.37287    0.05033 106.749  < 2e-16 ***
## VariableKitMastitis   0.37463    0.05986   6.258 1.25e-08 ***
## VariableKitPfood     -0.28240    0.05986  -4.718 8.59e-06 ***
## VariableKitPSoilP    -0.75785    0.06285 -12.058  < 2e-16 ***
## VariableKitPviralDNA  0.21673    0.06830   3.173 0.002056 ** 
## VariableKitZymoDNA   -0.20975    0.05986  -3.504 0.000713 ***
## SpikeSetSecond        1.19041    0.04651  25.595  < 2e-16 ***
## SpikeSetThird         1.32028    0.04582  28.815  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1796 on 91 degrees of freedom
## Multiple R-squared:  0.943,  Adjusted R-squared:  0.9386 
## F-statistic: 215.1 on 7 and 91 DF,  p-value: < 2.2e-16
m_Listeria.LogCopiespermLofMilk2 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, data=Listeria.InnOnly )
summary(m_Listeria.LogCopiespermLofMilk2)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, 
##     data = Listeria.InnOnly)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5353 -0.1140  0.0311  0.1480  0.3386 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           9.95563    0.13712  72.605  < 2e-16 ***
## VariableKitMastitis   0.39940    0.06362   6.278 1.11e-08 ***
## VariableKitPfood     -0.25763    0.06362  -4.050 0.000107 ***
## VariableKitPSoilP    -0.73690    0.06672 -11.045  < 2e-16 ***
## VariableKitPviralDNA  0.24188    0.07241   3.341 0.001209 ** 
## VariableKitZymoDNA   -0.18499    0.06362  -2.908 0.004561 ** 
## qPCRefficiency       -7.31247    0.25287 -28.918  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1908 on 92 degrees of freedom
## Multiple R-squared:  0.9349, Adjusted R-squared:  0.9307 
## F-statistic: 220.3 on 6 and 92 DF,  p-value: < 2.2e-16
m_Listeria.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Listeria.InnOnly )
summary(m_Listeria.LogCopiespermLofMilk3)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + 
##     SpikeSet, data = Listeria.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38090 -0.06193 -0.00581  0.07481  0.30014 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -38.47062    4.19088  -9.180 1.46e-14 ***
## VariableKitMastitis    0.13712    0.04637   2.957  0.00397 ** 
## VariableKitPfood      -0.51991    0.04637 -11.212  < 2e-16 ***
## VariableKitPSoilP     -0.99555    0.04815 -20.676  < 2e-16 ***
## VariableKitPviralDNA   0.02838    0.04952   0.573  0.56805    
## VariableKitZymoDNA    -0.44726    0.04637  -9.645 1.56e-15 ***
## qPCRefficiency        70.12757    6.70308  10.462  < 2e-16 ***
## SpikeSetSecond        12.17216    1.05015  11.591  < 2e-16 ***
## SpikeSetThird         14.09919    1.22185  11.539  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1213 on 90 degrees of freedom
## Multiple R-squared:  0.9743, Adjusted R-squared:  0.972 
## F-statistic: 426.2 on 8 and 90 DF,  p-value: < 2.2e-16
# Fit of model with both qPCRefficiency and SpikeSet is better than fit of model with SpikeSet only
anova(m_Listeria.LogCopiespermLofMilk1, m_Listeria.LogCopiespermLofMilk2, m_Listeria.LogCopiespermLofMilk3)
## Analysis of Variance Table
## 
## Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency
## Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1     91 2.9347                                  
## 2     92 3.3504 -1  -0.41571 28.253 7.673e-07 ***
## 3     90 1.3242  2   2.02619 68.853 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(m_Listeria.LogCopiespermLofMilk1)
## [1] -49.38216
AIC(m_Listeria.LogCopiespermLofMilk2)
## [1] -38.26713
AIC(m_Listeria.LogCopiespermLofMilk3)
## [1] -126.1634
# Final model chosen:
# Model 3:  LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
# m_Listeria.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Listeria.InnOnly )

Mixed effects models - for reference

library(lme4)
library(lmerTest)

model1 = lmer(LogCopiespermLofMilk ~ VariableKit + (1|SpikeSet),
            data=Listeria.InnOnly,
            REML=TRUE)
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
##    Data: Listeria.InnOnly
## 
## REML criterion at convergence: -26.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7104 -0.6104  0.1486  0.8050  1.5907 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  SpikeSet (Intercept) 0.52832  0.7269  
##  Residual             0.03225  0.1796  
## Number of obs: 99, groups:  SpikeSet, 3
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)           6.20977    0.42178  2.03237  14.723 0.004289 ** 
## VariableKitMastitis   0.37463    0.05986 90.99933   6.258 1.25e-08 ***
## VariableKitPfood     -0.28240    0.05986 90.99933  -4.718 8.59e-06 ***
## VariableKitPSoilP    -0.75784    0.06285 90.99999 -12.058  < 2e-16 ***
## VariableKitPviralDNA  0.21761    0.06829 91.01451   3.186 0.001975 ** 
## VariableKitZymoDNA   -0.20975    0.05986 90.99933  -3.504 0.000713 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) VrblKM VrblKP VrKPSP VKPDNA
## VrblKtMstts -0.071                            
## VariblKtPfd -0.071  0.500                     
## VarblKtPSlP -0.068  0.476  0.476              
## VrblKtPvDNA -0.062  0.438  0.438  0.417       
## VrblKtZyDNA -0.071  0.500  0.500  0.476  0.438
model2 = lmer(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1|SpikeSet),
            data=Listeria.InnOnly,
            REML=TRUE)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
summary(model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
##    Data: Listeria.InnOnly
## 
## REML criterion at convergence: -94.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.14660 -0.50911 -0.05461  0.61975  2.46790 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  SpikeSet (Intercept) 57.12448 7.5581  
##  Residual              0.01472 0.1213  
## Number of obs: 99, groups:  SpikeSet, 3
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)          -29.11488    5.53685   4.90246  -5.258  0.00350 ** 
## VariableKitMastitis    0.14108    0.04629  90.57640   3.048  0.00302 ** 
## VariableKitPfood      -0.51595    0.04629  90.57640 -11.146  < 2e-16 ***
## VariableKitPSoilP     -0.99158    0.04807  90.53671 -20.627  < 2e-16 ***
## VariableKitPviralDNA   0.03155    0.04947  90.32411   0.638  0.52522    
## VariableKitZymoDNA    -0.44331    0.04629  90.57640  -9.577 2.05e-15 ***
## qPCRefficiency        68.95896    6.65292  91.81209  10.365  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) VrblKM VrblKP VrKPSP VKPDNA VKZDNA
## VrblKtMstts  0.296                                   
## VariblKtPfd  0.296  0.618                            
## VarblKtPSlP  0.286  0.596  0.596                     
## VrblKtPvDNA  0.219  0.533  0.533  0.513              
## VrblKtZyDNA  0.296  0.618  0.618  0.596  0.533       
## qPCReffcncy -0.616 -0.487 -0.487 -0.469 -0.361 -0.487
## optimizer (nloptwrap) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
anova(model1, model2)
## refitting model(s) with ML (instead of REML)
## Data: Listeria.InnOnly
## Models:
## model1: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
## model2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
##        npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)    
## model1    8 -30.520  -9.759 23.260   -46.52                         
## model2    9 -90.866 -67.510 54.433  -108.87 62.346  1  2.881e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC (model1)
## [1] -10.19267
AIC (model2)
## [1] -76.85232

Final Model

m_Listeria.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Listeria.InnOnly )
summary(m_Listeria.LogCopiespermLofMilk)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + 
##     SpikeSet, data = Listeria.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38090 -0.06193 -0.00581  0.07481  0.30014 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -38.47062    4.19088  -9.180 1.46e-14 ***
## VariableKitMastitis    0.13712    0.04637   2.957  0.00397 ** 
## VariableKitPfood      -0.51991    0.04637 -11.212  < 2e-16 ***
## VariableKitPSoilP     -0.99555    0.04815 -20.676  < 2e-16 ***
## VariableKitPviralDNA   0.02838    0.04952   0.573  0.56805    
## VariableKitZymoDNA    -0.44726    0.04637  -9.645 1.56e-15 ***
## qPCRefficiency        70.12757    6.70308  10.462  < 2e-16 ***
## SpikeSetSecond        12.17216    1.05015  11.591  < 2e-16 ***
## SpikeSetThird         14.09919    1.22185  11.539  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1213 on 90 degrees of freedom
## Multiple R-squared:  0.9743, Adjusted R-squared:  0.972 
## F-statistic: 426.2 on 8 and 90 DF,  p-value: < 2.2e-16
plot(x=predict(m_Listeria.LogCopiespermLofMilk),y=resid(m_Listeria.LogCopiespermLofMilk))

  # using ggplot2
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
 
ggplot(m_Listeria.LogCopiespermLofMilk, aes(x=predict(m_Listeria.LogCopiespermLofMilk), y=resid(m_Listeria.LogCopiespermLofMilk), color=VariableKit)) +
  geom_point()+
  theme_bw()+
  ggtitle("Listeria Innoculated Only - Model Fit - Residuals vs Predicted")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  geom_hline(yintercept = 1) +
  geom_hline(yintercept = -1)

  # qqplots
qqnorm(resid(m_Listeria.LogCopiespermLofMilk))
qqline(resid(m_Listeria.LogCopiespermLofMilk))

summary(m_Listeria.LogCopiespermLofMilk)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + 
##     SpikeSet, data = Listeria.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38090 -0.06193 -0.00581  0.07481  0.30014 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -38.47062    4.19088  -9.180 1.46e-14 ***
## VariableKitMastitis    0.13712    0.04637   2.957  0.00397 ** 
## VariableKitPfood      -0.51991    0.04637 -11.212  < 2e-16 ***
## VariableKitPSoilP     -0.99555    0.04815 -20.676  < 2e-16 ***
## VariableKitPviralDNA   0.02838    0.04952   0.573  0.56805    
## VariableKitZymoDNA    -0.44726    0.04637  -9.645 1.56e-15 ***
## qPCRefficiency        70.12757    6.70308  10.462  < 2e-16 ***
## SpikeSetSecond        12.17216    1.05015  11.591  < 2e-16 ***
## SpikeSetThird         14.09919    1.22185  11.539  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1213 on 90 degrees of freedom
## Multiple R-squared:  0.9743, Adjusted R-squared:  0.972 
## F-statistic: 426.2 on 8 and 90 DF,  p-value: < 2.2e-16
#No large residuals were identified

Listeria.InnOnly$resid <- resid(m_Listeria.LogCopiespermLofMilk)
Listeria.InnOnly %>% 
  filter(abs(resid)>1) %>%
  select(VariableKit,resid) %>%
  group_by(VariableKit) %>%
  summarize(n=n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 0 x 2
## # … with 2 variables: VariableKit <chr>, n <int>
# Check Tukey-adjusted pairwise comparison of kit estimates
m_Listeria.LogCopiespermLofMilk_emmeans <- emmeans(m_Listeria.LogCopiespermLofMilk,pairwise~VariableKit)
# Use compact letter display for convenience
m_Listeria.LogCopiespermLofMilk_cld <- CLD(m_Listeria.LogCopiespermLofMilk_emmeans$emmeans,
                                         Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
m_Listeria.LogCopiespermLofMilk_cld
##  VariableKit emmean     SE df lower.CL upper.CL .group
##  PSoilP        4.90 0.0610 90     4.78     5.03  A    
##  Pfood         5.38 0.0596 90     5.26     5.50   B   
##  ZymoDNA       5.45 0.0596 90     5.33     5.57   B   
##  COREDNA       5.90 0.0412 90     5.82     5.98    C  
##  PviralDNA     5.93 0.0598 90     5.81     6.05    CD 
##  Mastitis      6.04 0.0596 90     5.92     6.16     D 
## 
## Results are averaged over the levels of: SpikeSet 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 6 estimates 
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(m_Listeria.LogCopiespermLofMilk,~ VariableKit) %>%
  summary() %>%
  data.frame() 
##   VariableKit   emmean         SE df lower.CL upper.CL
## 1     COREDNA 5.899998 0.04115991 90 5.818226 5.981769
## 2    Mastitis 6.037118 0.05961461 90 5.918684 6.155553
## 3       Pfood 5.380092 0.05961461 90 5.261658 5.498527
## 4      PSoilP 4.904452 0.06101657 90 4.783232 5.025673
## 5   PviralDNA 5.928375 0.05981400 90 5.809544 6.047206
## 6     ZymoDNA 5.452734 0.05961461 90 5.334299 5.571169
# Plot fitted values from model
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
emmeans(m_Listeria.LogCopiespermLofMilk,~VariableKit) %>%
  summary() %>%
  data.frame() %>%
  ggplot(aes(x=VariableKit,y=emmean,color=VariableKit)) +
  geom_point() +
  labs(y="Estimated Marginal Means") +
  geom_errorbar(aes(ymin=lower.CL,ymax=upper.CL),width=0.5) +
  geom_text(data=data.frame(m_Listeria.LogCopiespermLofMilk_cld),aes(x=VariableKit,label=`.group`),hjust=-.1) +
  theme_bw()+
  ggtitle("Listeria Copy Numbers - Inoculated Milk Only")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

## Listeria - Models not assuming homoscedasticity

# from https://cran.r-project.org/web/packages/emmeans/vignettes/FAQs.html#contents

library(nlme)
# Final model chosen:
# Model 3:  LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
# m_Listeria.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Listeria.InnOnly )

mod.Listeria = nlme::gls(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Listeria.InnOnly,
                 weights = varIdent(form = ~1 | VariableKit))
summary(mod.Listeria)
## Generalized least squares fit by REML
##   Model: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet 
##   Data: Listeria.InnOnly 
##        AIC       BIC   logLik
##   -136.564 -99.06683 83.28199
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | VariableKit 
##  Parameter estimates:
##   COREDNA  Mastitis     Pfood    PSoilP PviralDNA   ZymoDNA 
## 1.0000000 0.8738631 0.8509705 4.3116394 1.5284977 2.6389409 
## 
## Coefficients:
##                          Value Std.Error    t-value p-value
## (Intercept)          -44.37198  2.109774 -21.031628   0.000
## VariableKitMastitis    0.10487  0.021750   4.821762   0.000
## VariableKitPfood      -0.55215  0.021573 -25.594078   0.000
## VariableKitPSoilP     -1.02340  0.068235 -14.998130   0.000
## VariableKitPviralDNA   0.02838  0.031166   0.910509   0.365
## VariableKitZymoDNA    -0.47951  0.040946 -11.710872   0.000
## qPCRefficiency        79.64858  3.376327  23.590305   0.000
## SpikeSetSecond        13.61953  0.527318  25.827934   0.000
## SpikeSetThird         15.72431  0.613679  25.623005   0.000
## 
##  Correlation: 
##                      (Intr) VrblKM VrblKP VrKPSP VKPDNA VKZDNA qPCRff SpkStS
## VariableKitMastitis   0.521                                                 
## VariableKitPfood      0.526  0.692                                          
## VariableKitPSoilP     0.166  0.219  0.221                                   
## VariableKitPviralDNA  0.270  0.429  0.433  0.137                            
## VariableKitZymoDNA    0.277  0.365  0.368  0.116  0.228                     
## qPCRefficiency       -1.000 -0.526 -0.530 -0.168 -0.272 -0.279              
## SpikeSetSecond       -1.000 -0.525 -0.530 -0.167 -0.277 -0.279  0.999       
## SpikeSetThird        -1.000 -0.526 -0.530 -0.168 -0.276 -0.279  1.000  1.000
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -1.88195463 -0.73035842 -0.04161918  0.79892625  1.90855601 
## 
## Residual standard error: 0.05910791 
## Degrees of freedom: 99 total; 90 residual
AIC(m_Listeria.LogCopiespermLofMilk3)
## [1] -126.1634
AIC(mod.Listeria)
## [1] -136.564
# Testing simpler model

mod3.Listeria = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=Listeria.InnOnly,
                 weights = varIdent(form = ~1 | VariableKit))
summary(mod3.Listeria)
## Generalized least squares fit by REML
##   Model: LogCopiespermLofMilk ~ VariableKit 
##   Data: Listeria.InnOnly 
##        AIC      BIC    logLik
##   170.6132 201.0044 -73.30658
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | VariableKit 
##  Parameter estimates:
##   COREDNA  Mastitis     Pfood    PSoilP PviralDNA   ZymoDNA 
## 1.0000000 0.5468388 0.5518105 0.7654328 0.1060306 0.6640781 
## 
## Coefficients:
##                          Value Std.Error   t-value p-value
## (Intercept)           6.209768 0.2087850 29.742411  0.0000
## VariableKitMastitis   0.374630 0.2379629  1.574320  0.1188
## VariableKitPfood     -0.282396 0.2384627 -1.184237  0.2393
## VariableKitPSoilP    -0.749190 0.2724676 -2.749648  0.0072
## VariableKitPviralDNA  0.635179 0.2105381  3.016932  0.0033
## VariableKitZymoDNA   -0.209755 0.2506289 -0.836913  0.4048
## 
##  Correlation: 
##                      (Intr) VrblKM VrblKP VrKPSP VKPDNA
## VariableKitMastitis  -0.877                            
## VariableKitPfood     -0.876  0.768                     
## VariableKitPSoilP    -0.766  0.672  0.671              
## VariableKitPviralDNA -0.992  0.870  0.868  0.760       
## VariableKitZymoDNA   -0.833  0.731  0.729  0.638  0.826
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.9601839 -1.2519516  0.5402811  0.7307451  1.5677673 
## 
## Residual standard error: 0.8857996 
## Degrees of freedom: 99 total; 93 residual
AIC(m_Listeria.LogCopiespermLofMilk3)
## [1] -126.1634
AIC(mod.Listeria)
## [1] -136.564
AIC(mod3.Listeria) #mod.Listeria is best model (including qPCRefficiency and SpikeSet)
## [1] 170.6132
mod.Listeria.best <- mod.Listeria
#mod.Listeria not assuming homoscedasticity and including qPCRefficiency and SpikeSet is a much better fit than any of the alternatives

Listeria Figure

library(emmeans)
# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Listeria.best_emmeans <- emmeans(mod.Listeria.best,pairwise~VariableKit, mode = "df.error")

# Use compact letter display for convenience
mod.Listeria.best_cld <- CLD(mod.Listeria.best_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Listeria.best_cld_letters <- CLD(mod.Listeria.best_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Listeria.best_cld_letters
##  VariableKit emmean     SE df lower.CL upper.CL .group
##  PSoilP        4.83 0.0709 90     4.69     4.98  A    
##  Pfood         5.31 0.0289 90     5.25     5.36   B   
##  ZymoDNA       5.38 0.0452 90     5.29     5.47   B   
##  COREDNA       5.86 0.0204 90     5.82     5.90    C  
##  PviralDNA     5.89 0.0354 90     5.82     5.96    CD 
##  Mastitis      5.96 0.0290 90     5.91     6.02     D 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: df.error 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 6 estimates 
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(mod.Listeria.best,~ VariableKit,mode = "df.error") %>%
  summary() %>%
  data.frame() 
##   VariableKit   emmean         SE df lower.CL upper.CL
## 1     COREDNA 5.857941 0.02040897 90 5.817395 5.898487
## 2    Mastitis 5.962816 0.02902571 90 5.905151 6.020480
## 3       Pfood 5.305790 0.02889339 90 5.248388 5.363192
## 4      PSoilP 4.834540 0.07089067 90 4.693704 4.975377
## 5   PviralDNA 5.886318 0.03539537 90 5.815999 5.956637
## 6     ZymoDNA 5.378431 0.04523235 90 5.288569 5.468293
# Get summary
summary(emmeans(mod.Listeria.best,~ VariableKit,mode = "df.error"), infer=TRUE)
##  VariableKit emmean     SE df lower.CL upper.CL t.ratio p.value
##  COREDNA       5.86 0.0204 90     5.82     5.90 287.028 <.0001 
##  Mastitis      5.96 0.0290 90     5.91     6.02 205.432 <.0001 
##  Pfood         5.31 0.0289 90     5.25     5.36 183.633 <.0001 
##  PSoilP        4.83 0.0709 90     4.69     4.98  68.197 <.0001 
##  PviralDNA     5.89 0.0354 90     5.82     5.96 166.302 <.0001 
##  ZymoDNA       5.38 0.0452 90     5.29     5.47 118.907 <.0001 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: df.error 
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod_df1_Listeria.rawdata<-Listeria.InnOnly[c(25,42,4)]

mod_df2_Listeria.best.model<-emmeans(mod.Listeria.best,~VariableKit, mode = "df.error") %>%
  summary() %>%
  data.frame()

ggplot() +
  geom_jitter(data=mod_df1_Listeria.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
  geom_point(data=mod_df2_Listeria.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_errorbar(data=mod_df2_Listeria.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)

# Making the plot pretty

Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
  
ggplot() +
  geom_jitter(data=mod_df1_Listeria.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet)) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Listeria.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Listeria.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Listeria.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
  theme_bw()+
  ggtitle("Listeria Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.


Model not assuming homoscedasticity and including VariableKit + SpikeSet + qPCRefficiency was chosen.
qPCRefficiency is forced into all final models

Formula: mod.Listeria = nlme::gls(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Listeria.InnOnly, weights = varIdent(form = ~1 | VariableKit))

AIC(mod.Listeria)
-136.564 # best model

Previously chosen Linear Model that assumed homoscedasticity for reference:
m_Listeria.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Listeria.InnOnly )
AIC(m_Listeria.LogCopiespermLofMilk3)
-126.1634



Manuscript Figures: Listeria


# Listeria: Milk Data and Controls
Listeria.Inn.Ctrl <- Listeria %>% filter(VariableSampleType!="NP40InoculatedMilk")
dim(Listeria.Inn.Ctrl)
## [1] 240  42
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2338B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")


Listeria.Inn.Ctrl$VariableSampleType <- factor(Listeria.Inn.Ctrl$VariableSampleType, levels=c('InoculatedMilk', 'UninoculatedMilk', 'NoTemplateControl', 'MockCommunity'))

ggplot(data=Listeria.Inn.Ctrl, aes(VariableKit,LogCopiespermLofMilk, color= VariableKit, shape=SpikeSet))+
  scale_shape_discrete(solid=F) +
  ylab ("Listeria Log10 Copies / mL of Milk")+  
  xlab ("Kit")+
  geom_point(aes(colour = VariableKit), size = 2, stroke = .5, position=position_jitterdodge(jitter.width=0, dodge.width = 1),show.legend = F) +
  facet_wrap(vars(VariableSampleType),nrow = 1)+
  ggtitle("Listeria DNA Copy Numbers - All Samples and Controls")+
  theme_bw()+
  ylim(0, 9)+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=90,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Warning: Removed 130 rows containing missing values (geom_point).

ggsave("Listeria-AllSamples.TIFF", width = 9, height = 3,units = "in", dpi = 600)
## Warning: Removed 130 rows containing missing values (geom_point).
ggplot(Listeria.InnOnly, aes(VariableKit,LogCopiespermLofMilk,shape = factor(SpikeSet))) +
  scale_shape_discrete(solid=F) +
  geom_point(aes(colour = VariableKit), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = 1)) +
  ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Listeria DNA Copy Numbers - Inoculated Milk Only")+
  theme_bw()+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

# Plot overlaying model estimates to raw data
mod_df1_Listeria.rawdata<- Listeria.InnOnly[c(25,42,4)]

mod_df2_Listeria.best.model<-emmeans(mod.Listeria.best,~VariableKit, mode = "df.error") %>%
  summary() %>%
  data.frame()

ggplot() +
  geom_jitter(data=mod_df1_Listeria.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2,stroke = 1, width = .2 ) +
   scale_shape_discrete(solid=F) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Listeria.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Listeria.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Listeria.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = .3, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Listeria DNA Copy Numbers - Inoculated Milk Only")+
   theme_bw()+
   theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

ggsave("Listeria-Model-Jitter.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)


ggplot() +
  geom_point(data=mod_df1_Listeria.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = .5)) +
   scale_shape_discrete(solid=F) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Listeria.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Listeria.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Listeria.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = .3, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Listeria DNA Copy Numbers - Inoculated Milk Only")+
   theme_bw()+
   theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

ggsave("Listeria-Model.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)

Mycobacterium


Mycobacterium smegmatis Copy Numbers

#Data File: CleanDNAprepData1.18.19
library(ggplot2)
library(dplyr)
library(emmeans)
library(multcompView)

#Filter Subset from Sample Data
Mycobacterium <- SampleData %>% filter(Assay=="Mycobacterium smegmatis")
dim(Mycobacterium)
## [1] 240  42
#Summary Statistics

Mycobacterium.summary <- Mycobacterium %>%
  group_by(VariableKit,VariableSampleType) %>%
  summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
            st_dev=sd(LogCopiespermLofMilk,na.rm=T),
            n_missing=sum(is.na(LogCopiespermLofMilk)),
            n_total=n())%>%
  data.frame()
## `summarise()` regrouping output by 'VariableKit' (override with `.groups` argument)
write.table (Mycobacterium.summary, "Mycobacterium.summary.txt", sep="\t" )
#Plot Raw Means and Standard Deviations
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(data=Mycobacterium,
       mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk,z=VariableKit, color=VariableKit, ylab="Copy Numbers")) +
  ylab ("Log10 Copies / mL of Milk")+
  geom_boxplot(lwd=1)+
  theme_bw()+
  ggtitle("Mycobacterium Copy Numbers")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk",  "MockCommunity", "NoTemplateControl"))
## Warning: Removed 101 rows containing non-finite values (stat_boxplot).

ggplot(data=Mycobacterium,
       mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
  ylab ("Log10 Copies / mL of Milk")+
      geom_jitter(width=0.25)+
  ggtitle("Mycobacterium Copy Numbers")+
  theme_bw()+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk",  "MockCommunity", "NoTemplateControl"))
## Warning: Removed 101 rows containing missing values (geom_point).

ggplot(data=Mycobacterium,
       mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=SpikeSet, shape=VariableKit)) +
  ylab ("Log10 Copies / mL of Milk")+
    geom_jitter(width=0.35)+
  ggtitle("Mycobacterium Copy Numbers")+
  theme_bw()+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 123 rows containing missing values (geom_point).

# Inoculated Milk Data
Mycobacterium.InnOnly <- Mycobacterium %>% filter(VariableSampleType=="InoculatedMilk", LogCopiespermLofMilk>0.001)
Mycobacterium.InnOnly %>%
  group_by(VariableSampleType, VariableSpikeSet, VariableKit) %>%
  summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
            st_dev=sd(LogCopiespermLofMilk,na.rm=T),
            n_missing=sum(is.na(LogCopiespermLofMilk)),
            n_total=n())%>%
  data.frame()
## `summarise()` regrouping output by 'VariableSampleType', 'VariableSpikeSet' (override with `.groups` argument)
##    VariableSampleType VariableSpikeSet VariableKit mean_LogCopiespermLofMilk
## 1      InoculatedMilk            First     COREDNA                  6.410321
## 2      InoculatedMilk            First      EZFood                  3.387637
## 3      InoculatedMilk            First    Mastitis                  6.132651
## 4      InoculatedMilk            First       Pfood                  5.835527
## 5      InoculatedMilk            First      PSoilP                  4.337075
## 6      InoculatedMilk            First   PviralDNA                  5.791533
## 7      InoculatedMilk            First     ZymoDNA                  5.517488
## 8      InoculatedMilk           Second     COREDNA                  7.101812
## 9      InoculatedMilk           Second    Mastitis                  7.079584
## 10     InoculatedMilk           Second       Pfood                  6.579458
## 11     InoculatedMilk           Second      PSoilP                  6.072664
## 12     InoculatedMilk           Second   PviralDNA                  6.680003
## 13     InoculatedMilk           Second     ZymoDNA                  6.701950
## 14     InoculatedMilk            Third     COREDNA                  6.917894
## 15     InoculatedMilk            Third      EZFood                  6.308042
## 16     InoculatedMilk            Third    Mastitis                  6.875300
## 17     InoculatedMilk            Third       Pfood                  6.465286
## 18     InoculatedMilk            Third      PSoilP                  5.904056
## 19     InoculatedMilk            Third   PviralDNA                  6.266574
## 20     InoculatedMilk            Third     ZymoDNA                  6.695051
##        st_dev n_missing n_total
## 1  0.03379999         0       6
## 2  0.53911986         0       4
## 3  0.03721685         0       6
## 4  0.02840872         0       6
## 5  0.18166318         0       6
## 6  0.06735908         0       6
## 7  0.08926898         0       6
## 8  0.07093623         0       6
## 9  0.02215197         0       6
## 10 0.04582864         0       6
## 11 0.17450050         0       6
## 12 0.03305596         0       6
## 13 0.04422551         0       6
## 14 0.08192923         0       6
## 15         NA         0       1
## 16 0.03405437         0       6
## 17 0.02810963         0       6
## 18 0.13596609         0       6
## 19 0.08559028         0       6
## 20 0.19000457         0       6

Model Selection

Linear Models

#3 linear models were compared: including SpikeSet only, qPCRefficiency only, and both as covariates. Best model fit was used as the final model. 
m_Mycobacterium.LogCopiespermLofMilk1 <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Mycobacterium.InnOnly )
summary(m_Mycobacterium.LogCopiespermLofMilk1)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Mycobacterium.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.92497 -0.13287 -0.02344  0.15372  1.58590 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           6.13871    0.07752  79.185  < 2e-16 ***
## VariableKitEZFood    -2.35460    0.14721 -15.995  < 2e-16 ***
## VariableKitMastitis  -0.11416    0.09533  -1.198    0.234    
## VariableKitPfood     -0.51659    0.09533  -5.419 3.90e-07 ***
## VariableKitPSoilP    -1.37208    0.09533 -14.393  < 2e-16 ***
## VariableKitPviralDNA -0.56397    0.09533  -5.916 4.26e-08 ***
## VariableKitZymoDNA   -0.50518    0.09533  -5.299 6.55e-07 ***
## SpikeSetSecond        1.07587    0.06705  16.046  < 2e-16 ***
## SpikeSetThird         0.93803    0.06596  14.222  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.286 on 104 degrees of freedom
## Multiple R-squared:  0.8946, Adjusted R-squared:  0.8865 
## F-statistic: 110.3 on 8 and 104 DF,  p-value: < 2.2e-16
m_Mycobacterium.LogCopiespermLofMilk2 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, data=Mycobacterium.InnOnly )
summary(m_Mycobacterium.LogCopiespermLofMilk2)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, 
##     data = Mycobacterium.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.86642 -0.22173  0.00509  0.19529  1.35168 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            8.5221     0.1298  65.677  < 2e-16 ***
## VariableKitEZFood     -1.9474     0.1653 -11.782  < 2e-16 ***
## VariableKitMastitis    0.2337     0.1048   2.229   0.0279 *  
## VariableKitPfood      -0.1687     0.1048  -1.609   0.1106    
## VariableKitPSoilP     -1.0242     0.1048  -9.769  < 2e-16 ***
## VariableKitPviralDNA  -0.5640     0.1025  -5.500 2.69e-07 ***
## VariableKitZymoDNA    -0.1573     0.1048  -1.500   0.1365    
## qPCRefficiency        -3.1148     0.1958 -15.910  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3076 on 105 degrees of freedom
## Multiple R-squared:  0.8769, Adjusted R-squared:  0.8686 
## F-statistic: 106.8 on 7 and 105 DF,  p-value: < 2.2e-16
m_Mycobacterium.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Mycobacterium.InnOnly )
summary(m_Mycobacterium.LogCopiespermLofMilk3)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + 
##     SpikeSet, data = Mycobacterium.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.89045 -0.11670 -0.02229  0.10545  1.44781 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           7.31862    0.25291  28.938  < 2e-16 ***
## VariableKitEZFood    -2.09249    0.14393 -14.538  < 2e-16 ***
## VariableKitMastitis   0.06611    0.09404   0.703 0.483636    
## VariableKitPfood     -0.33631    0.09404  -3.576 0.000533 ***
## VariableKitPSoilP    -1.19180    0.09404 -12.673  < 2e-16 ***
## VariableKitPviralDNA -0.56397    0.08641  -6.527 2.57e-09 ***
## VariableKitZymoDNA   -0.32490    0.09404  -3.455 0.000801 ***
## qPCRefficiency       -1.61421    0.33237  -4.857 4.28e-06 ***
## SpikeSetSecond        0.66315    0.10447   6.348 5.97e-09 ***
## SpikeSetThird         0.47280    0.11292   4.187 5.97e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2592 on 103 degrees of freedom
## Multiple R-squared:  0.9142, Adjusted R-squared:  0.9067 
## F-statistic:   122 on 9 and 103 DF,  p-value: < 2.2e-16
# Fit of model with both qPCRefficiency and SpikeSet is better than fit of model with SpikeSet only
anova(m_Mycobacterium.LogCopiespermLofMilk1, m_Mycobacterium.LogCopiespermLofMilk2, m_Mycobacterium.LogCopiespermLofMilk3)
## Analysis of Variance Table
## 
## Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency
## Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1    104 8.5058                                  
## 2    105 9.9352 -1   -1.4294 21.273 1.149e-05 ***
## 3    103 6.9209  2    3.0143 22.430 8.199e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(m_Mycobacterium.LogCopiespermLofMilk1)
## [1] 48.38973
AIC(m_Mycobacterium.LogCopiespermLofMilk2)
## [1] 63.94316
AIC(m_Mycobacterium.LogCopiespermLofMilk3)
## [1] 27.08939
# Final model chosen:
# Model 3:  LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
#m_Mycobacterium.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Mycobacterium.InnOnly )

Mixed effects models - for reference

library(lme4)
library(lmerTest)

model1 = lmer(LogCopiespermLofMilk ~ VariableKit + (1|SpikeSet),
            data=Mycobacterium.InnOnly,
            REML=TRUE)
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
##    Data: Mycobacterium.InnOnly
## 
## REML criterion at convergence: 64.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2385 -0.4552 -0.0726  0.5291  5.5621 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  SpikeSet (Intercept) 0.34054  0.5836  
##  Residual             0.08179  0.2860  
## Number of obs: 113, groups:  SpikeSet, 3
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            6.81001    0.34359   2.13547  19.820  0.00185 ** 
## VariableKitEZFood     -2.35772    0.14719 104.04783 -16.018  < 2e-16 ***
## VariableKitMastitis   -0.11416    0.09533 104.00016  -1.198  0.23380    
## VariableKitPfood      -0.51659    0.09533 104.00016  -5.419 3.90e-07 ***
## VariableKitPSoilP     -1.37208    0.09533 104.00016 -14.393  < 2e-16 ***
## VariableKitPviralDNA  -0.56397    0.09533 104.00016  -5.916 4.26e-08 ***
## VariableKitZymoDNA    -0.50518    0.09533 104.00016  -5.299 6.55e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VarblKtEZFd -0.090                                   
## VrblKtMstts -0.139  0.324                            
## VariblKtPfd -0.139  0.324  0.500                     
## VarblKtPSlP -0.139  0.324  0.500  0.500              
## VrblKtPvDNA -0.139  0.324  0.500  0.500  0.500       
## VrblKtZyDNA -0.139  0.324  0.500  0.500  0.500  0.500
model2 = lmer(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1|SpikeSet),
            data=Mycobacterium.InnOnly,
            REML=TRUE)
summary(model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
##    Data: Mycobacterium.InnOnly
## 
## REML criterion at convergence: 41.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4323 -0.4457 -0.1126  0.4201  5.5752 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  SpikeSet (Intercept) 0.10805  0.3287  
##  Residual             0.06722  0.2593  
## Number of obs: 113, groups:  SpikeSet, 3
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            7.74985    0.26748   6.39677  28.973 4.99e-08 ***
## VariableKitEZFood     -2.08149    0.14368 103.65629 -14.487  < 2e-16 ***
## VariableKitMastitis    0.07680    0.09371 104.20386   0.820 0.414355    
## VariableKitPfood      -0.32563    0.09371 104.20386  -3.475 0.000746 ***
## VariableKitPSoilP     -1.18112    0.09371 104.20386 -12.604  < 2e-16 ***
## VariableKitPviralDNA  -0.56397    0.08642 102.92851  -6.526 2.59e-09 ***
## VariableKitZymoDNA    -0.31422    0.09371 104.20386  -3.353 0.001114 ** 
## qPCRefficiency        -1.70987    0.32440 100.04919  -5.271 7.84e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA
## VarblKtEZFd  0.150                                          
## VrblKtMstts  0.109  0.421                                   
## VariblKtPfd  0.109  0.421  0.575                            
## VarblKtPSlP  0.109  0.421  0.575  0.575                     
## VrblKtPvDNA -0.162  0.301  0.461  0.461  0.461              
## VrblKtZyDNA  0.109  0.421  0.575  0.575  0.575  0.461       
## qPCReffcncy -0.667 -0.371 -0.387 -0.387 -0.387  0.000 -0.387
anova(model1, model2)
## refitting model(s) with ML (instead of REML)
## Data: Mycobacterium.InnOnly
## Models:
## model1: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
## model2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
##        npar    AIC   BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## model1    9 63.564 88.11 -22.782   45.564                         
## model2   10 39.466 66.74  -9.733   19.466 26.098  1  3.246e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC (model1)
## [1] 82.45669
AIC (model2)
## [1] 61.19095

Final Model

m_Mycobacterium.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Mycobacterium.InnOnly )
summary(m_Mycobacterium.LogCopiespermLofMilk)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + 
##     SpikeSet, data = Mycobacterium.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.89045 -0.11670 -0.02229  0.10545  1.44781 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           7.31862    0.25291  28.938  < 2e-16 ***
## VariableKitEZFood    -2.09249    0.14393 -14.538  < 2e-16 ***
## VariableKitMastitis   0.06611    0.09404   0.703 0.483636    
## VariableKitPfood     -0.33631    0.09404  -3.576 0.000533 ***
## VariableKitPSoilP    -1.19180    0.09404 -12.673  < 2e-16 ***
## VariableKitPviralDNA -0.56397    0.08641  -6.527 2.57e-09 ***
## VariableKitZymoDNA   -0.32490    0.09404  -3.455 0.000801 ***
## qPCRefficiency       -1.61421    0.33237  -4.857 4.28e-06 ***
## SpikeSetSecond        0.66315    0.10447   6.348 5.97e-09 ***
## SpikeSetThird         0.47280    0.11292   4.187 5.97e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2592 on 103 degrees of freedom
## Multiple R-squared:  0.9142, Adjusted R-squared:  0.9067 
## F-statistic:   122 on 9 and 103 DF,  p-value: < 2.2e-16
plot(x=predict(m_Mycobacterium.LogCopiespermLofMilk),y=resid(m_Mycobacterium.LogCopiespermLofMilk))

  # using ggplot2
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
 
ggplot(m_Mycobacterium.LogCopiespermLofMilk, aes(x=predict(m_Mycobacterium.LogCopiespermLofMilk), y=resid(m_Mycobacterium.LogCopiespermLofMilk), color=VariableKit)) +
  geom_point()+
  theme_bw()+
  ggtitle("Mycobacterium Innoculated Only - Model Fit - Residuals vs Predicted")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  geom_hline(yintercept = 1) +
  geom_hline(yintercept = -1)

  # qqplots
qqnorm(resid(m_Mycobacterium.LogCopiespermLofMilk))
qqline(resid(m_Mycobacterium.LogCopiespermLofMilk))

summary(m_Mycobacterium.LogCopiespermLofMilk)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + 
##     SpikeSet, data = Mycobacterium.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.89045 -0.11670 -0.02229  0.10545  1.44781 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           7.31862    0.25291  28.938  < 2e-16 ***
## VariableKitEZFood    -2.09249    0.14393 -14.538  < 2e-16 ***
## VariableKitMastitis   0.06611    0.09404   0.703 0.483636    
## VariableKitPfood     -0.33631    0.09404  -3.576 0.000533 ***
## VariableKitPSoilP    -1.19180    0.09404 -12.673  < 2e-16 ***
## VariableKitPviralDNA -0.56397    0.08641  -6.527 2.57e-09 ***
## VariableKitZymoDNA   -0.32490    0.09404  -3.455 0.000801 ***
## qPCRefficiency       -1.61421    0.33237  -4.857 4.28e-06 ***
## SpikeSetSecond        0.66315    0.10447   6.348 5.97e-09 ***
## SpikeSetThird         0.47280    0.11292   4.187 5.97e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2592 on 103 degrees of freedom
## Multiple R-squared:  0.9142, Adjusted R-squared:  0.9067 
## F-statistic:   122 on 9 and 103 DF,  p-value: < 2.2e-16
# Only 1 large residual was identified, and it belonged to EZfood

Mycobacterium.InnOnly$resid <- resid(m_Mycobacterium.LogCopiespermLofMilk)
Mycobacterium.InnOnly %>% 
  filter(abs(resid)>1) %>%
  select(VariableKit,resid) %>%
  group_by(VariableKit) %>%
  summarize(n=n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 1 x 2
##   VariableKit     n
##   <chr>       <int>
## 1 EZFood          1
# Check Tukey-adjusted pairwise comparison of kit estimates
m_Mycobacterium.LogCopiespermLofMilk_emmeans <- emmeans(m_Mycobacterium.LogCopiespermLofMilk,pairwise~VariableKit)
# Use compact letter display for convenience
m_Mycobacterium.LogCopiespermLofMilk_cld <- CLD(m_Mycobacterium.LogCopiespermLofMilk_emmeans$emmeans,
                                         Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
m_Mycobacterium.LogCopiespermLofMilk_cld
##  VariableKit emmean     SE  df lower.CL upper.CL .group
##  EZFood        4.58 0.1215 103     4.34     4.82  A    
##  PSoilP        5.48 0.0618 103     5.36     5.61   B   
##  PviralDNA     6.11 0.0671 103     5.98     6.24    C  
##  Pfood         6.34 0.0618 103     6.22     6.46    C  
##  ZymoDNA       6.35 0.0618 103     6.23     6.47    C  
##  COREDNA       6.67 0.0671 103     6.54     6.81     D 
##  Mastitis      6.74 0.0618 103     6.62     6.86     D 
## 
## Results are averaged over the levels of: SpikeSet 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 7 estimates 
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(m_Mycobacterium.LogCopiespermLofMilk,~ VariableKit) %>%
  summary() %>%
  data.frame() 
##   VariableKit   emmean         SE  df lower.CL upper.CL
## 1     COREDNA 6.674713 0.06714940 103 6.541538 6.807888
## 2      EZFood 4.582219 0.12146273 103 4.341326 4.823111
## 3    Mastitis 6.740826 0.06179603 103 6.618268 6.863383
## 4       Pfood 6.338404 0.06179603 103 6.215846 6.460962
## 5      PSoilP 5.482912 0.06179603 103 5.360354 5.605470
## 6   PviralDNA 6.110741 0.06714940 103 5.977566 6.243916
## 7     ZymoDNA 6.349810 0.06179603 103 6.227252 6.472368
# Plot fitted values from model
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")

emmeans(m_Mycobacterium.LogCopiespermLofMilk,~VariableKit) %>%
  summary() %>%
  data.frame() %>%
  ggplot(aes(x=VariableKit,y=emmean,color=VariableKit)) +
  geom_point() +
  labs(y="Estimated Marginal Means") +
  geom_errorbar(aes(ymin=lower.CL,ymax=upper.CL),width=0.5) +
  geom_text(data=data.frame(m_Mycobacterium.LogCopiespermLofMilk_cld),aes(x=VariableKit,label=`.group`),hjust=-.1) +
  theme_bw()+
  ggtitle("Mycobacterium Copy Numbers - Inoculated Milk Only")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

Mycobacterium - Models not assuming homoscedasticity

# from https://cran.r-project.org/web/packages/emmeans/vignettes/FAQs.html#contents

library(nlme)
# Final model chosen:
# Model 3:  LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
# m_Mycobacterium.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Mycobacterium.InnOnly )

mod.Mycobacterium = nlme::gls(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Mycobacterium.InnOnly,
                 weights = varIdent(form = ~1 | VariableKit))
summary(mod.Mycobacterium)
## Generalized least squares fit by REML
##   Model: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet 
##   Data: Mycobacterium.InnOnly 
##       AIC       BIC  logLik
##   -80.071 -35.28061 57.0355
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | VariableKit 
##  Parameter estimates:
##    COREDNA     EZFood   Mastitis      Pfood     PSoilP  PviralDNA    ZymoDNA 
##  1.0000000 16.7764658  0.4895085  1.3746125  6.5788124  1.7605999  3.3967428 
## 
## Coefficients:
##                          Value Std.Error   t-value p-value
## (Intercept)           6.774266 0.0734689  92.20592  0.0000
## VariableKitEZFood    -2.354573 0.4844962  -4.85984  0.0000
## VariableKitMastitis  -0.041469 0.0199514  -2.07852  0.0401
## VariableKitPfood     -0.443891 0.0279206 -15.89836  0.0000
## VariableKitPSoilP    -1.299383 0.1017353 -12.77220  0.0000
## VariableKitPviralDNA -0.563972 0.0307886 -18.31757  0.0000
## VariableKitZymoDNA   -0.432485 0.0548675  -7.88234  0.0000
## qPCRefficiency       -0.650915 0.0945204  -6.88650  0.0000
## SpikeSetSecond        0.700651 0.0314932  22.24770  0.0000
## SpikeSetThird         0.479920 0.0340255  14.10469  0.0000
## 
##  Correlation: 
##                      (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA qPCRff
## VariableKitEZFood     0.020                                                 
## VariableKitMastitis   0.356  0.039                                          
## VariableKitPfood      0.254  0.028  0.615                                   
## VariableKitPSoilP     0.070  0.008  0.169  0.121                            
## VariableKitPviralDNA -0.102  0.016  0.376  0.269  0.074                     
## VariableKitZymoDNA    0.129  0.014  0.313  0.224  0.061  0.137              
## qPCRefficiency       -0.971 -0.028 -0.529 -0.378 -0.104  0.000 -0.192       
## SpikeSetSecond       -0.902 -0.019 -0.465 -0.332 -0.091  0.000 -0.169  0.879
## SpikeSetThird        -0.916 -0.021 -0.475 -0.339 -0.093  0.000 -0.173  0.897
##                      SpkStS
## VariableKitEZFood          
## VariableKitMastitis        
## VariableKitPfood           
## VariableKitPSoilP          
## VariableKitPviralDNA       
## VariableKitZymoDNA         
## qPCRefficiency             
## SpikeSetSecond             
## SpikeSetThird         0.894
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.06631319 -0.60733948 -0.01670647  0.70681273  2.59918745 
## 
## Residual standard error: 0.06451336 
## Degrees of freedom: 113 total; 103 residual
AIC(m_Mycobacterium.LogCopiespermLofMilk3)
## [1] 27.08939
AIC(mod.Mycobacterium)
## [1] -80.071
# Testing simpler model

mod3.Mycobacterium = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=Mycobacterium.InnOnly,
                 weights = varIdent(form = ~1 | VariableKit))
summary(mod3.Mycobacterium)
## Generalized least squares fit by REML
##   Model: LogCopiespermLofMilk ~ VariableKit 
##   Data: Mycobacterium.InnOnly 
##        AIC      BIC    logLik
##   185.8061 223.0942 -78.90304
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | VariableKit 
##  Parameter estimates:
##   COREDNA    EZFood  Mastitis     Pfood    PSoilP PviralDNA   ZymoDNA 
##  1.000000  4.515682  1.366589  1.101159  2.666382  1.232636  1.903249 
## 
## Coefficients:
##                          Value Std.Error  t-value p-value
## (Intercept)           6.810009 0.0723945 94.06805  0.0000
## VariableKitEZFood    -2.838291 0.6244796 -4.54505  0.0000
## VariableKitMastitis  -0.114164 0.1225920 -0.93125  0.3538
## VariableKitPfood     -0.516586 0.1076842 -4.79722  0.0000
## VariableKitPSoilP    -1.372077 0.2061603 -6.65539  0.0000
## VariableKitPviralDNA -0.563972 0.1149088 -4.90800  0.0000
## VariableKitZymoDNA   -0.505179 0.1556457 -3.24570  0.0016
## 
##  Correlation: 
##                      (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VariableKitEZFood    -0.116                                   
## VariableKitMastitis  -0.591  0.068                            
## VariableKitPfood     -0.672  0.078  0.397                     
## VariableKitPSoilP    -0.351  0.041  0.207  0.236              
## VariableKitPviralDNA -0.630  0.073  0.372  0.424  0.221       
## VariableKitZymoDNA   -0.465  0.054  0.275  0.313  0.163  0.293
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.6471390 -1.2065620  0.4419297  0.7611800  1.6844881 
## 
## Residual standard error: 0.3071438 
## Degrees of freedom: 113 total; 106 residual
AIC(m_Mycobacterium.LogCopiespermLofMilk3)
## [1] 27.08939
AIC(mod.Mycobacterium)
## [1] -80.071
AIC(mod3.Mycobacterium) #mod.Mycobacterium is best model (including qPCRefficiency and SpikeSet)
## [1] 185.8061
#mod.Mycobacterium not assuming homoscedasticity and including qPCRefficiency and SpikeSet is a much better fit than any of the alternatives

mod.Mycobacterium.best <- mod.Mycobacterium

Final Mycobacterium Figure

# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Mycobacterium_emmeans <- emmeans(mod.Mycobacterium,pairwise~VariableKit)
# Use compact letter display for convenience
mod.Mycobacterium_cld <- CLD(mod.Mycobacterium_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Mycobacterium_cld_letters <- CLD(mod.Mycobacterium_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Mycobacterium_cld
## $emmeans
##  VariableKit emmean       SE    df lower.CL upper.CL .group
##  EZFood       4.401 0.484098  4.00    3.057    5.745  ABCD 
##  PSoilP       5.456 0.100072 16.83    5.245    5.667  A    
##  PviralDNA    6.191 0.027919 17.20    6.133    6.250   B   
##  Pfood        6.312 0.021068 13.53    6.266    6.357    C  
##  ZymoDNA      6.323 0.051718 16.57    6.214    6.432   BC  
##  Mastitis     6.714 0.007896 17.89    6.697    6.731     D 
##  COREDNA      6.755 0.017146 19.41    6.720    6.791     D 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: satterthwaite 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 7 estimates 
## significance level used: alpha = 0.05 
## 
## $comparisons
##  contrast             estimate     SE    df t.ratio p.value
##  PSoilP - EZFood        1.0552 0.4943  4.35  2.135  0.4591 
##  PviralDNA - EZFood     1.7906 0.4850  4.03  3.692  0.1274 
##  PviralDNA - PSoilP     0.7354 0.1041 19.57  7.065  <.0001 
##  Pfood - EZFood         1.9107 0.4845  4.01  3.943  0.1052 
##  Pfood - PSoilP         0.8555 0.1022 18.34  8.371  <.0001 
##  Pfood - PviralDNA      0.1201 0.0356 33.31  3.376  0.0281 
##  ZymoDNA - EZFood       1.9221 0.4868  4.09  3.948  0.1025 
##  ZymoDNA - PSoilP       0.8669 0.1126 24.93  7.700  <.0001 
##  ZymoDNA - PviralDNA    0.1315 0.0591 26.27  2.224  0.3172 
##  ZymoDNA - Pfood        0.0114 0.0557 22.02  0.205  1.0000 
##  Mastitis - EZFood      2.3131 0.4841  4.00  4.778  0.0576 
##  Mastitis - PSoilP      1.2579 0.1003 16.99 12.540  <.0001 
##  Mastitis - PviralDNA   0.5225 0.0297 21.22 17.578  <.0001 
##  Mastitis - Pfood       0.4024 0.0222 16.77 18.137  <.0001 
##  Mastitis - ZymoDNA     0.3910 0.0522 17.15  7.493  <.0001 
##  COREDNA - EZFood       2.3546 0.4845  4.01  4.860  0.0541 
##  COREDNA - PSoilP       1.2994 0.1017 17.95 12.772  <.0001 
##  COREDNA - PviralDNA    0.5640 0.0308 24.37 18.318  <.0001 
##  COREDNA - Pfood        0.4439 0.0279 30.10 15.898  <.0001 
##  COREDNA - ZymoDNA      0.4325 0.0549 20.68  7.882  <.0001 
##  COREDNA - Mastitis     0.0415 0.0200 26.89  2.079  0.3928 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: satterthwaite 
## P value adjustment: tukey method for comparing a family of 7 estimates
# Get fitted values from model to plot with other software
emmeans(mod.Mycobacterium,~ VariableKit) %>%
  summary() %>%
  data.frame() 
##   VariableKit   emmean          SE        df lower.CL upper.CL
## 1     COREDNA 6.755452 0.017145930 19.411487 6.719617 6.791288
## 2      EZFood 4.400879 0.484097497  4.000816 3.056917 5.744841
## 3    Mastitis 6.713983 0.007895688 17.887142 6.697387 6.730579
## 4       Pfood 6.311561 0.021067570 13.528588 6.266228 6.356895
## 5      PSoilP 5.456069 0.100071724 16.827130 5.244771 5.667368
## 6   PviralDNA 6.191480 0.027919167 17.197002 6.132627 6.250333
## 7     ZymoDNA 6.322968 0.051717793 16.569065 6.213636 6.432299
# Get summary
summary(emmeans(mod.Mycobacterium,~ VariableKit), infer=TRUE)
##  VariableKit emmean       SE    df lower.CL upper.CL t.ratio p.value
##  COREDNA      6.755 0.017146 19.41    6.720    6.791 393.997 <.0001 
##  EZFood       4.401 0.484098  4.00    3.057    5.745   9.091 0.0008 
##  Mastitis     6.714 0.007896 17.89    6.697    6.731 850.336 <.0001 
##  Pfood        6.312 0.021068 13.53    6.266    6.357 299.587 <.0001 
##  PSoilP       5.456 0.100072 16.83    5.245    5.667  54.522 <.0001 
##  PviralDNA    6.191 0.027919 17.20    6.133    6.250 221.764 <.0001 
##  ZymoDNA      6.323 0.051718 16.57    6.214    6.432 122.259 <.0001 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: satterthwaite 
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod.Mycobacterium_df1_Mycobacterium.rawdata<-Mycobacterium.InnOnly[c(25,42,4)]

mod.Mycobacterium_df2_Mycobacterium.model<-emmeans(mod.Mycobacterium,~VariableKit) %>%
  summary() %>%
  data.frame()

ggplot() +
  geom_jitter(data=mod.Mycobacterium_df1_Mycobacterium.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
  geom_point(data=mod.Mycobacterium_df2_Mycobacterium.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_errorbar(data=mod.Mycobacterium_df2_Mycobacterium.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)

# Making the plot pretty

Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")


ggplot() +
  geom_jitter(data=mod.Mycobacterium_df1_Mycobacterium.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
  scale_color_manual(values=Colors)+
   geom_errorbar(data=mod.Mycobacterium_df2_Mycobacterium.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)+
   geom_point(data=mod.Mycobacterium_df2_Mycobacterium.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Mycobacterium_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold")+
ylim(2.5, 8.0)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
  theme_bw()+
  ggtitle("Mycobacterium Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Mycobacterium Figure

library(emmeans)
# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Mycobacterium.best_emmeans <- emmeans(mod.Mycobacterium.best,pairwise~VariableKit, mode = "df.error")

# Use compact letter display for convenience
mod.Mycobacterium.best_cld <- CLD(mod.Mycobacterium.best_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Mycobacterium.best_cld_letters <- CLD(mod.Mycobacterium.best_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Mycobacterium.best_cld_letters
##  VariableKit emmean       SE df lower.CL upper.CL .group
##  EZFood       4.401 0.484098 96    3.440    5.362  A    
##  PSoilP       5.456 0.100072 96    5.257    5.655  A    
##  PviralDNA    6.191 0.027919 96    6.136    6.247   B   
##  Pfood        6.312 0.021068 96    6.270    6.353    C  
##  ZymoDNA      6.323 0.051718 96    6.220    6.426   BC  
##  Mastitis     6.714 0.007896 96    6.698    6.730     D 
##  COREDNA      6.755 0.017146 96    6.721    6.789     D 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: df.error 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 7 estimates 
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(mod.Mycobacterium.best,~ VariableKit,mode = "df.error") %>%
  summary() %>%
  data.frame() 
##   VariableKit   emmean          SE df lower.CL upper.CL
## 1     COREDNA 6.755452 0.017145930 96 6.721418 6.789487
## 2      EZFood 4.400879 0.484097497 96 3.439953 5.361805
## 3    Mastitis 6.713983 0.007895688 96 6.698310 6.729656
## 4       Pfood 6.311561 0.021067570 96 6.269743 6.353380
## 5      PSoilP 5.456069 0.100071724 96 5.257429 5.654710
## 6   PviralDNA 6.191480 0.027919167 96 6.136061 6.246899
## 7     ZymoDNA 6.322968 0.051717793 96 6.220309 6.425627
# Get summary
summary(emmeans(mod.Mycobacterium.best,~ VariableKit,mode = "df.error"), infer=TRUE)
##  VariableKit emmean       SE df lower.CL upper.CL t.ratio p.value
##  COREDNA      6.755 0.017146 96    6.721    6.789 393.997 <.0001 
##  EZFood       4.401 0.484098 96    3.440    5.362   9.091 <.0001 
##  Mastitis     6.714 0.007896 96    6.698    6.730 850.336 <.0001 
##  Pfood        6.312 0.021068 96    6.270    6.353 299.587 <.0001 
##  PSoilP       5.456 0.100072 96    5.257    5.655  54.522 <.0001 
##  PviralDNA    6.191 0.027919 96    6.136    6.247 221.764 <.0001 
##  ZymoDNA      6.323 0.051718 96    6.220    6.426 122.259 <.0001 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: df.error 
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod_df1_Mycobacterium.rawdata<-Mycobacterium.InnOnly[c(25,42,4)]

mod_df2_Mycobacterium.best.model<-emmeans(mod.Mycobacterium.best,~VariableKit, mode = "df.error") %>%
  summary() %>%
  data.frame()

ggplot() +
  geom_jitter(data=mod_df1_Mycobacterium.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
  geom_point(data=mod_df2_Mycobacterium.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_errorbar(data=mod_df2_Mycobacterium.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)

# Making the plot pretty

Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
  
ggplot() +
  geom_jitter(data=mod_df1_Mycobacterium.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet)) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Mycobacterium.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Mycobacterium.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Mycobacterium.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
  theme_bw()+
  ggtitle("Mycobacterium Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.


Model not assuming homoscedasticity and including VariableKit + SpikeSet + qPCRefficiency was chosen.
qPCRefficiency is forced into all final models

Formula: mod.Mycobacterium = nlme::gls(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Mycobacterium.InnOnly, weights = varIdent(form = ~1 | VariableKit))

AIC(mod.Mycobacterium) # best model
-80.071


Other Models for Reference:

mod3.Mycobacterium = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=Mycobacterium.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod3.Mycobacterium)
185.8061

Previously chosen Linear Model that assumed homoscedasticity for reference:
m_Mycobacterium.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Mycobacterium.InnOnly )
AIC(m_Mycobacterium.LogCopiespermLofMilk3)
27.08939



Manuscript Figures: Mycobacterium


# Mycobacterium: Milk Data and Controls
Mycobacterium.Inn.Ctrl <- Mycobacterium %>% filter(VariableSampleType!="NP40InoculatedMilk")
dim(Mycobacterium.Inn.Ctrl)
## [1] 240  42
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2338B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")


Mycobacterium.Inn.Ctrl$VariableSampleType <- factor(Mycobacterium.Inn.Ctrl$VariableSampleType, levels=c('InoculatedMilk', 'UninoculatedMilk', 'NoTemplateControl', 'MockCommunity'))

ggplot(data=Mycobacterium.Inn.Ctrl, aes(VariableKit,LogCopiespermLofMilk, color= VariableKit, shape=SpikeSet))+
  scale_shape_discrete(solid=F) +
  ylab ("Mycobacterium Log10 Copies / mL of Milk")+  
  xlab ("Kit")+
  geom_point(aes(colour = VariableKit), size = 2, stroke = .5, position=position_jitterdodge(jitter.width=0, dodge.width = 1), show.legend = F) +
  facet_wrap(vars(VariableSampleType),nrow = 1)+
  ggtitle("Mycobacterium DNA Copy Numbers - All Samples and Controls")+
  theme_bw()+
  ylim(0, 9)+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=90,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Warning: Removed 101 rows containing missing values (geom_point).

ggsave("Mycobacterium-AllSamples.TIFF", width = 9, height = 3,units = "in", dpi = 600)
## Warning: Removed 101 rows containing missing values (geom_point).
ggplot(Mycobacterium.InnOnly, aes(VariableKit,LogCopiespermLofMilk,shape = factor(SpikeSet))) +
  scale_shape_discrete(solid=F) +
  geom_point(aes(colour = VariableKit), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = 1)) +
  ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Mycobacterium DNA Copy Numbers - Inoculated Milk Only")+
  theme_bw()+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

# Plot overlaying model estimates to raw data
mod_df1_Mycobacterium.rawdata<- Mycobacterium.InnOnly[c(25,42,4)]

mod_df2_Mycobacterium.best.model<-emmeans(mod.Mycobacterium.best,~VariableKit, mode = "df.error") %>%
  summary() %>%
  data.frame()

ggplot() +
  geom_jitter(data=mod_df1_Mycobacterium.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2,stroke = 1, width = .2 ) +
   scale_shape_discrete(solid=F) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Mycobacterium.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Mycobacterium.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Mycobacterium.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 1.2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Mycobacterium DNA Copy Numbers - Inoculated Milk Only")+
   theme_bw()+
   theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

ggsave("Mycobacterium-Model-Jitter.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)


ggplot() +
  geom_point(data=mod_df1_Mycobacterium.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = .5)) +
   scale_shape_discrete(solid=F) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Mycobacterium.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Mycobacterium.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Mycobacterium.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 1.2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Mycobacterium DNA Copy Numbers - Inoculated Milk Only")+
   theme_bw()+
   theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

ggsave("Mycobacterium-Model.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)

Salmonella


Salmonella sp. Copy Numbers

#Data File: CleanDNAprepData1.18.19
library(ggplot2)
library(dplyr)
library(emmeans)
library(multcompView)

#Filter Subset from Sample Data
Salmonella <- SampleData %>% filter(Assay=="Salmonella sp.")
dim(Salmonella)
## [1] 240  42
#Summary Statistics

Salmonella.summary <- Salmonella %>%
  group_by(VariableKit,VariableSampleType) %>%
  summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
            st_dev=sd(LogCopiespermLofMilk,na.rm=T),
            n_missing=sum(is.na(LogCopiespermLofMilk)),
            n_total=n())%>%
  data.frame()
## `summarise()` regrouping output by 'VariableKit' (override with `.groups` argument)
write.table (Salmonella.summary, "Salmonella.summary.txt", sep="\t" )
#Plot Raw Means and Standard Deviations
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(data=Salmonella,
       mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk,z=VariableKit, color=VariableKit, ylab="Copy Numbers")) +
  ylab ("Log10 Copies / mL of Milk")+
  geom_boxplot(lwd=1)+
  theme_bw()+
  ggtitle("Salmonella Copy Numbers")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk",  "MockCommunity", "NoTemplateControl"))
## Warning: Removed 100 rows containing non-finite values (stat_boxplot).

ggplot(data=Salmonella,
       mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
  ylab ("Log10 Copies / mL of Milk")+
      geom_jitter(width=0.25)+
  ggtitle("Salmonella Copy Numbers")+
  theme_bw()+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))
## Warning: Removed 100 rows containing missing values (geom_point).

ggplot(data=Salmonella,
       mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=SpikeSet)) +
  ylab ("Log10 Copies / mL of Milk")+
    geom_jitter(width=0.35)+
  ggtitle("Salmonella Copy Numbers")+
  theme_bw()+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk",  "MockCommunity", "NoTemplateControl"))
## Warning: Removed 100 rows containing missing values (geom_point).

# Inoculated Milk Data
Salmonella.InnOnly <- Salmonella %>% filter(VariableSampleType=="InoculatedMilk", LogCopiespermLofMilk>0.001)
Salmonella.InnOnly %>%
  group_by(VariableSampleType, VariableSpikeSet, VariableKit) %>%
  summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
            st_dev=sd(LogCopiespermLofMilk,na.rm=T),
            n_missing=sum(is.na(LogCopiespermLofMilk)),
            n_total=n())%>%
  data.frame()
## `summarise()` regrouping output by 'VariableSampleType', 'VariableSpikeSet' (override with `.groups` argument)
##    VariableSampleType VariableSpikeSet VariableKit mean_LogCopiespermLofMilk
## 1      InoculatedMilk            First     COREDNA                  6.021926
## 2      InoculatedMilk            First      EZFood                  3.553886
## 3      InoculatedMilk            First    Mastitis                  5.055847
## 4      InoculatedMilk            First       Pfood                  4.427881
## 5      InoculatedMilk            First      PSoilP                  3.313556
## 6      InoculatedMilk            First   PviralDNA                  5.637821
## 7      InoculatedMilk            First     ZymoDNA                  4.600919
## 8      InoculatedMilk           Second     COREDNA                  5.819654
## 9      InoculatedMilk           Second    Mastitis                  5.855133
## 10     InoculatedMilk           Second       Pfood                  5.161440
## 11     InoculatedMilk           Second      PSoilP                  4.265153
## 12     InoculatedMilk           Second   PviralDNA                  5.648986
## 13     InoculatedMilk           Second     ZymoDNA                  5.412725
## 14     InoculatedMilk            Third     COREDNA                  6.381353
## 15     InoculatedMilk            Third      EZFood                  5.491051
## 16     InoculatedMilk            Third    Mastitis                  6.491253
## 17     InoculatedMilk            Third       Pfood                  6.136920
## 18     InoculatedMilk            Third      PSoilP                  5.157145
## 19     InoculatedMilk            Third   PviralDNA                  6.017032
## 20     InoculatedMilk            Third     ZymoDNA                  6.011617
##        st_dev n_missing n_total
## 1  0.03689540         0       6
## 2  0.92148645         0       6
## 3  0.05638093         0       6
## 4  0.02807177         0       6
## 5  0.16097627         0       6
## 6  0.04113447         0       6
## 7  0.09323729         0       6
## 8  0.08889109         0       6
## 9  0.01505745         0       6
## 10 0.07302418         0       6
## 11 0.24970082         0       6
## 12 0.06073656         0       6
## 13 0.02695026         0       6
## 14 0.15232282         0       6
## 15         NA         0       1
## 16 0.03059223         0       6
## 17 0.04606326         0       6
## 18 0.08449168         0       4
## 19 0.10019394         0       6
## 20 0.20713085         0       6

Model Selection

Linear Models

#3 linear models were compared: including SpikeSet only, qPCRefficiency only, and both as covariates. Best model fit was used as the final model. 
m_Salmonella.LogCopiespermLofMilk1 <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Salmonella.InnOnly )
summary(m_Salmonella.LogCopiespermLofMilk1)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Salmonella.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.83872 -0.21061 -0.01782  0.19011  0.62629 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           5.49384    0.09486  57.913  < 2e-16 ***
## VariableKitEZFood    -1.83557    0.16017 -11.460  < 2e-16 ***
## VariableKitMastitis  -0.27357    0.11654  -2.347  0.02080 *  
## VariableKitPfood     -0.83223    0.11654  -7.141 1.30e-10 ***
## VariableKitPSoilP    -1.86476    0.12028 -15.504  < 2e-16 ***
## VariableKitPviralDNA -0.30636    0.11654  -2.629  0.00987 ** 
## VariableKitZymoDNA   -0.73256    0.11654  -6.286 7.77e-09 ***
## SpikeSetSecond        0.53492    0.08194   6.528 2.49e-09 ***
## SpikeSetThird         1.20649    0.08176  14.757  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3496 on 104 degrees of freedom
## Multiple R-squared:  0.8671, Adjusted R-squared:  0.8569 
## F-statistic: 84.82 on 8 and 104 DF,  p-value: < 2.2e-16
m_Salmonella.LogCopiespermLofMilk2 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, data=Salmonella.InnOnly )
summary(m_Salmonella.LogCopiespermLofMilk2)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, 
##     data = Salmonella.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.88308 -0.41227  0.08769  0.33440  0.89243 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           7.86150    0.20697  37.983  < 2e-16 ***
## VariableKitEZFood    -1.73247    0.20215  -8.570 9.68e-14 ***
## VariableKitMastitis   0.05774    0.15006   0.385  0.70118    
## VariableKitPfood     -0.50093    0.15006  -3.338  0.00117 ** 
## VariableKitPSoilP    -1.53819    0.15620  -9.847  < 2e-16 ***
## VariableKitPviralDNA -0.30636    0.14633  -2.094  0.03870 *  
## VariableKitZymoDNA   -0.40125    0.15006  -2.674  0.00869 ** 
## qPCRefficiency       -2.83325    0.28417  -9.970  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.439 on 105 degrees of freedom
## Multiple R-squared:  0.7885, Adjusted R-squared:  0.7744 
## F-statistic: 55.91 on 7 and 105 DF,  p-value: < 2.2e-16
m_Salmonella.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Salmonella.InnOnly )
summary(m_Salmonella.LogCopiespermLofMilk3)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + 
##     SpikeSet, data = Salmonella.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.78830 -0.07483 -0.00161  0.10882  0.67589 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           7.55018    0.21269  35.499  < 2e-16 ***
## VariableKitEZFood    -1.28789    0.12564 -10.251  < 2e-16 ***
## VariableKitMastitis   0.08887    0.08996   0.988 0.325513    
## VariableKitPfood     -0.46979    0.08996  -5.222 9.25e-07 ***
## VariableKitPSoilP    -1.48765    0.09296 -16.002  < 2e-16 ***
## VariableKitPviralDNA -0.30636    0.08263  -3.707 0.000339 ***
## VariableKitZymoDNA   -0.37012    0.08996  -4.114 7.84e-05 ***
## qPCRefficiency       -3.09952    0.30413 -10.191  < 2e-16 ***
## SpikeSetSecond        0.85860    0.06621  12.967  < 2e-16 ***
## SpikeSetThird         0.57925    0.08455   6.851 5.49e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2479 on 103 degrees of freedom
## Multiple R-squared:  0.9338, Adjusted R-squared:  0.928 
## F-statistic: 161.5 on 9 and 103 DF,  p-value: < 2.2e-16
# Fit of model with both qPCRefficiency and SpikeSet is better than fit of model with SpikeSet only
anova(m_Salmonella.LogCopiespermLofMilk1, m_Salmonella.LogCopiespermLofMilk2, m_Salmonella.LogCopiespermLofMilk3)
## Analysis of Variance Table
## 
## Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency
## Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1    104 12.713                                  
## 2    105 20.235 -1   -7.5219 122.39 < 2.2e-16 ***
## 3    103  6.330  2   13.9051 113.13 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(m_Salmonella.LogCopiespermLofMilk1)
## [1] 93.80425
AIC(m_Salmonella.LogCopiespermLofMilk2)
## [1] 144.3238
AIC(m_Salmonella.LogCopiespermLofMilk3)
## [1] 17.00478
# Final model chosen:
# Model 3:  LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
# m_Salmonella.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Salmonella.InnOnly )

Mixed effects models - for reference

library(lme4)
library(lmerTest)

model1 = lmer(LogCopiespermLofMilk ~ VariableKit + (1|SpikeSet),
            data=Salmonella.InnOnly,
            REML=TRUE)
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
##    Data: Salmonella.InnOnly
## 
## REML criterion at convergence: 106.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.2635 -0.6174 -0.0525  0.5580  1.8184 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  SpikeSet (Intercept) 0.3621   0.6018  
##  Residual             0.1222   0.3496  
## Number of obs: 113, groups:  SpikeSet, 3
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)            6.0743     0.3571   2.1906  17.011  0.00228 ** 
## VariableKitEZFood     -1.8392     0.1601 104.1022 -11.486  < 2e-16 ***
## VariableKitMastitis   -0.2736     0.1165 104.0004  -2.347  0.02080 *  
## VariableKitPfood      -0.8322     0.1165 104.0004  -7.141 1.30e-10 ***
## VariableKitPSoilP     -1.8655     0.1203 104.0052 -15.510  < 2e-16 ***
## VariableKitPviralDNA  -0.3064     0.1165 104.0004  -2.629  0.00987 ** 
## VariableKitZymoDNA    -0.7326     0.1165 104.0004  -6.286 7.77e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VarblKtEZFd -0.119                                   
## VrblKtMstts -0.163  0.364                            
## VariblKtPfd -0.163  0.364  0.500                     
## VarblKtPSlP -0.158  0.357  0.484  0.484              
## VrblKtPvDNA -0.163  0.364  0.500  0.500  0.484       
## VrblKtZyDNA -0.163  0.364  0.500  0.500  0.484  0.500
model2 = lmer(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1|SpikeSet),
            data=Salmonella.InnOnly,
            REML=TRUE)
summary(model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
##    Data: Salmonella.InnOnly
## 
## REML criterion at convergence: 33.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.2179 -0.3065 -0.0233  0.4196  2.7233 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  SpikeSet (Intercept) 0.19217  0.4384  
##  Residual             0.06144  0.2479  
## Number of obs: 113, groups:  SpikeSet, 3
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            8.02377    0.32161   4.95852  24.949 2.10e-06 ***
## VariableKitEZFood     -1.29290    0.12534 103.84473 -10.315  < 2e-16 ***
## VariableKitMastitis    0.08782    0.08979 103.69083   0.978 0.330345    
## VariableKitPfood      -0.47085    0.08979 103.69083  -5.244 8.36e-07 ***
## VariableKitPSoilP     -1.48885    0.09281 103.62123 -16.042  < 2e-16 ***
## VariableKitPviralDNA  -0.30636    0.08263 103.04345  -3.708 0.000339 ***
## VariableKitZymoDNA    -0.37117    0.08979 103.69083  -4.134 7.26e-05 ***
## qPCRefficiency        -3.09049    0.30063 104.91698 -10.280  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA
## VarblKtEZFd  0.165                                          
## VrblKtMstts  0.113  0.469                                   
## VariblKtPfd  0.113  0.469  0.577                            
## VarblKtPSlP  0.118  0.464  0.564  0.564                     
## VrblKtPvDNA -0.128  0.330  0.460  0.460  0.445              
## VrblKtZyDNA  0.113  0.469  0.577  0.577  0.564  0.460       
## qPCReffcncy -0.590 -0.424 -0.391 -0.391 -0.395  0.000 -0.391
anova(model1, model2)
## refitting model(s) with ML (instead of REML)
## Data: Salmonella.InnOnly
## Models:
## model1: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
## model2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
##        npar     AIC     BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## model1    9 107.958 132.504 -44.979   89.958                         
## model2   10  31.314  58.588  -5.657   11.314 78.643  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC (model1)
## [1] 124.5633
AIC (model2)
## [1] 53.32024

Final Model

m_Salmonella.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Salmonella.InnOnly )
summary(m_Salmonella.LogCopiespermLofMilk)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + 
##     SpikeSet, data = Salmonella.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.78830 -0.07483 -0.00161  0.10882  0.67589 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           7.55018    0.21269  35.499  < 2e-16 ***
## VariableKitEZFood    -1.28789    0.12564 -10.251  < 2e-16 ***
## VariableKitMastitis   0.08887    0.08996   0.988 0.325513    
## VariableKitPfood     -0.46979    0.08996  -5.222 9.25e-07 ***
## VariableKitPSoilP    -1.48765    0.09296 -16.002  < 2e-16 ***
## VariableKitPviralDNA -0.30636    0.08263  -3.707 0.000339 ***
## VariableKitZymoDNA   -0.37012    0.08996  -4.114 7.84e-05 ***
## qPCRefficiency       -3.09952    0.30413 -10.191  < 2e-16 ***
## SpikeSetSecond        0.85860    0.06621  12.967  < 2e-16 ***
## SpikeSetThird         0.57925    0.08455   6.851 5.49e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2479 on 103 degrees of freedom
## Multiple R-squared:  0.9338, Adjusted R-squared:  0.928 
## F-statistic: 161.5 on 9 and 103 DF,  p-value: < 2.2e-16
plot(x=predict(m_Salmonella.LogCopiespermLofMilk),y=resid(m_Salmonella.LogCopiespermLofMilk))

  # using ggplot2
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
 
ggplot(m_Salmonella.LogCopiespermLofMilk, aes(x=predict(m_Salmonella.LogCopiespermLofMilk), y=resid(m_Salmonella.LogCopiespermLofMilk), color=VariableKit)) +
  geom_point()+
  theme_bw()+
  ggtitle("Salmonella Innoculated Only - Model Fit - Residuals vs Predicted")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  geom_hline(yintercept = 1) +
  geom_hline(yintercept = -1)

  # qqplots
qqnorm(resid(m_Salmonella.LogCopiespermLofMilk))
qqline(resid(m_Salmonella.LogCopiespermLofMilk))

summary(m_Salmonella.LogCopiespermLofMilk)
## 
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + 
##     SpikeSet, data = Salmonella.InnOnly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.78830 -0.07483 -0.00161  0.10882  0.67589 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           7.55018    0.21269  35.499  < 2e-16 ***
## VariableKitEZFood    -1.28789    0.12564 -10.251  < 2e-16 ***
## VariableKitMastitis   0.08887    0.08996   0.988 0.325513    
## VariableKitPfood     -0.46979    0.08996  -5.222 9.25e-07 ***
## VariableKitPSoilP    -1.48765    0.09296 -16.002  < 2e-16 ***
## VariableKitPviralDNA -0.30636    0.08263  -3.707 0.000339 ***
## VariableKitZymoDNA   -0.37012    0.08996  -4.114 7.84e-05 ***
## qPCRefficiency       -3.09952    0.30413 -10.191  < 2e-16 ***
## SpikeSetSecond        0.85860    0.06621  12.967  < 2e-16 ***
## SpikeSetThird         0.57925    0.08455   6.851 5.49e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2479 on 103 degrees of freedom
## Multiple R-squared:  0.9338, Adjusted R-squared:  0.928 
## F-statistic: 161.5 on 9 and 103 DF,  p-value: < 2.2e-16
# ONly 1 large residual was identified, and it belonged to EZfood

Salmonella.InnOnly$resid <- resid(m_Salmonella.LogCopiespermLofMilk)
Salmonella.InnOnly %>% 
  filter(abs(resid)>1) %>%
  select(VariableKit,resid) %>%
  group_by(VariableKit) %>%
  summarize(n=n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 1 x 2
##   VariableKit     n
##   <chr>       <int>
## 1 EZFood          1
# Check Tukey-adjusted pairwise comparison of kit estimates
m_Salmonella.LogCopiespermLofMilk_emmeans <- emmeans(m_Salmonella.LogCopiespermLofMilk,pairwise~VariableKit)
# Use compact letter display for convenience
m_Salmonella.LogCopiespermLofMilk_cld <- CLD(m_Salmonella.LogCopiespermLofMilk_emmeans$emmeans,
                                         Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
m_Salmonella.LogCopiespermLofMilk_cld
##  VariableKit emmean     SE  df lower.CL upper.CL .group
##  PSoilP        4.32 0.0630 103     4.19     4.44  A    
##  EZFood        4.52 0.1011 103     4.32     4.72  A    
##  Pfood         5.33 0.0591 103     5.22     5.45   B   
##  ZymoDNA       5.43 0.0591 103     5.32     5.55   B   
##  PviralDNA     5.50 0.0642 103     5.37     5.62   B   
##  COREDNA       5.80 0.0642 103     5.68     5.93    C  
##  Mastitis      5.89 0.0591 103     5.78     6.01    C  
## 
## Results are averaged over the levels of: SpikeSet 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 7 estimates 
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(m_Salmonella.LogCopiespermLofMilk,~ VariableKit) %>%
  summary() %>%
  data.frame() 
##   VariableKit   emmean         SE  df lower.CL upper.CL
## 1     COREDNA 5.803758 0.06417945 103 5.676474 5.931043
## 2      EZFood 4.515871 0.10110663 103 4.315350 4.716392
## 3    Mastitis 5.892633 0.05912317 103 5.775376 6.009890
## 4       Pfood 5.333969 0.05912317 103 5.216712 5.451226
## 5      PSoilP 4.316106 0.06299397 103 4.191172 4.441039
## 6   PviralDNA 5.497394 0.06417945 103 5.370109 5.624679
## 7     ZymoDNA 5.433642 0.05912317 103 5.316386 5.550899
# Plot fitted values from model
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
emmeans(m_Salmonella.LogCopiespermLofMilk,~VariableKit) %>%
  summary() %>%
  data.frame() %>%
  ggplot(aes(x=VariableKit,y=emmean,color=VariableKit)) +
  geom_point() +
  labs(y="Estimated Marginal Means") +
  geom_errorbar(aes(ymin=lower.CL,ymax=upper.CL),width=0.5) +
  geom_text(data=data.frame(m_Salmonella.LogCopiespermLofMilk_cld),aes(x=VariableKit,label=`.group`),hjust=-.1) +
  theme_bw()+
  ggtitle("Salmonella Copy Numbers - Inoculated Milk Only")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

Salmonella - Model not assuming homoscedasticity and simpler model

# from https://cran.r-project.org/web/packages/emmeans/vignettes/FAQs.html#contents

library(nlme)
# Final model chosen:
# Model 3:  LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
# m_Salmonella.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Salmonella.InnOnly )

mod.Salmonella = nlme::gls(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Salmonella.InnOnly,
                 weights = varIdent(form = ~1 | VariableKit))
summary(mod.Salmonella)
## Generalized least squares fit by REML
##   Model: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet 
##   Data: Salmonella.InnOnly 
##         AIC       BIC   logLik
##   -67.22324 -22.43285 50.61162
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | VariableKit 
##  Parameter estimates:
##   COREDNA    EZFood  Mastitis     Pfood    PSoilP PviralDNA   ZymoDNA 
## 1.0000000 8.1183574 0.3580213 1.5164933 2.2489849 1.2699044 1.1815861 
## 
## Coefficients:
##                          Value Std.Error   t-value p-value
## (Intercept)           7.439099 0.0948369  78.44094  0.0000
## VariableKitEZFood    -1.363513 0.3273561  -4.16523  0.0001
## VariableKitMastitis   0.061077 0.0305593   1.99864  0.0483
## VariableKitPfood     -0.497587 0.0478911 -10.38997  0.0000
## VariableKitPSoilP    -1.522134 0.0665349 -22.87723  0.0000
## VariableKitPviralDNA -0.306365 0.0404458  -7.57470  0.0000
## VariableKitZymoDNA   -0.397914 0.0415666  -9.57292  0.0000
## qPCRefficiency       -2.861799 0.1289834 -22.18735  0.0000
## SpikeSetSecond        0.784978 0.0194802  40.29625  0.0000
## SpikeSetThird         0.536250 0.0387801  13.82796  0.0000
## 
##  Correlation: 
##                      (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA qPCRff
## VariableKitEZFood     0.031                                                 
## VariableKitMastitis   0.257  0.091                                          
## VariableKitPfood      0.164  0.058  0.583                                   
## VariableKitPSoilP     0.121  0.042  0.422  0.269                            
## VariableKitPviralDNA -0.163  0.047  0.507  0.323  0.233                     
## VariableKitZymoDNA    0.189  0.067  0.672  0.429  0.310  0.372              
## qPCRefficiency       -0.958 -0.057 -0.494 -0.315 -0.231  0.000 -0.363       
## SpikeSetSecond        0.178  0.040  0.140  0.090  0.066  0.000  0.103 -0.284
## SpikeSetThird        -0.887 -0.040 -0.432 -0.276 -0.194  0.000 -0.318  0.876
##                      SpkStS
## VariableKitEZFood          
## VariableKitMastitis        
## VariableKitPfood           
## VariableKitPSoilP          
## VariableKitPviralDNA       
## VariableKitZymoDNA         
## qPCRefficiency             
## SpikeSetSecond             
## SpikeSetThird        -0.018
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.1463772 -0.6589410 -0.1011567  0.6409211  3.0768720 
## 
## Residual standard error: 0.1061618 
## Degrees of freedom: 113 total; 103 residual
AIC(m_Salmonella.LogCopiespermLofMilk3)
## [1] 17.00478
AIC(mod.Salmonella)
## [1] -67.22324
# Testing simpler model

mod3.Salmonella = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=Salmonella.InnOnly,
                 weights = varIdent(form = ~1 | VariableKit))
summary(mod3.Salmonella)
## Generalized least squares fit by REML
##   Model: LogCopiespermLofMilk ~ VariableKit 
##   Data: Salmonella.InnOnly 
##        AIC      BIC   logLik
##   194.3074 231.5955 -83.1537
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | VariableKit 
##  Parameter estimates:
##   COREDNA    EZFood  Mastitis     Pfood    PSoilP PviralDNA   ZymoDNA 
## 1.0000000 4.3188465 2.3442877 2.7961189 2.9648645 0.7489734 2.3532494 
## 
## Coefficients:
##                          Value Std.Error  t-value p-value
## (Intercept)           6.074311 0.0608632 99.80266  0.0000
## VariableKitEZFood    -2.243688 0.4258834 -5.26831  0.0000
## VariableKitMastitis  -0.273567 0.1551199 -1.76358  0.0807
## VariableKitPfood     -0.832231 0.1807369 -4.60465  0.0000
## VariableKitPSoilP    -1.943009 0.2008415 -9.67434  0.0000
## VariableKitPviralDNA -0.306365 0.0760416 -4.02891  0.0001
## VariableKitZymoDNA   -0.732557 0.1556217 -4.70730  0.0000
## 
##  Correlation: 
##                      (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VariableKitEZFood    -0.143                                   
## VariableKitMastitis  -0.392  0.056                            
## VariableKitPfood     -0.337  0.048  0.132                     
## VariableKitPSoilP    -0.303  0.043  0.119  0.102              
## VariableKitPviralDNA -0.800  0.114  0.314  0.270  0.243       
## VariableKitZymoDNA   -0.391  0.056  0.153  0.132  0.119  0.313
## 
## Standardized residuals:
##          Min           Q1          Med           Q3          Max 
## -1.803309694 -0.926163281  0.002337255  0.888570595  1.895109185 
## 
## Residual standard error: 0.2582208 
## Degrees of freedom: 113 total; 106 residual
AIC(m_Salmonella.LogCopiespermLofMilk3)
## [1] 17.00478
AIC(mod.Salmonella)
## [1] -67.22324
AIC(mod3.Salmonella) #mod.Salmonella is best model (including qPCRefficiency and SpikeSet)
## [1] 194.3074
#mod.Salmonella not assuming homoscedasticity and including qPCRefficiency and SpikeSet is a much better fit than any of the alternatives

mod.Salmonella.best <- mod.Salmonella

Final Salmonella Figure

# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Salmonella_emmeans <- emmeans(mod.Salmonella,pairwise~VariableKit)
# Use compact letter display for convenience
mod.Salmonella_cld <- CLD(mod.Salmonella_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Salmonella_cld_letters <- CLD(mod.Salmonella_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Salmonella_cld
## $emmeans
##  VariableKit emmean       SE    df lower.CL upper.CL .group
##  PSoilP       4.302 0.059846 15.05    4.175    4.430  A    
##  EZFood       4.461 0.325949  6.03    3.664    5.258  ABC  
##  Pfood        5.327 0.038139 15.59    5.246    5.408   B   
##  ZymoDNA      5.427 0.029813 19.29    5.364    5.489   BC  
##  PviralDNA    5.518 0.033712 16.80    5.447    5.589    C  
##  COREDNA      5.825 0.027439 19.66    5.767    5.882     D 
##  Mastitis     5.886 0.009741 20.00    5.865    5.906     D 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: satterthwaite 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 7 estimates 
## significance level used: alpha = 0.05 
## 
## $comparisons
##  contrast             estimate     SE    df t.ratio p.value
##  EZFood - PSoilP        0.1586 0.3313  6.43  0.479  0.9983 
##  Pfood - PSoilP         1.0245 0.0707 24.98 14.483  <.0001 
##  Pfood - EZFood         0.8659 0.3281  6.18  2.639  0.2547 
##  ZymoDNA - PSoilP       1.1242 0.0666 22.11 16.874  <.0001 
##  ZymoDNA - EZFood       0.9656 0.3272  6.12  2.951  0.1818 
##  ZymoDNA - Pfood        0.0997 0.0481 31.09  2.072  0.3931 
##  PviralDNA - PSoilP     1.2158 0.0694 25.01 17.529  <.0001 
##  PviralDNA - EZFood     1.0571 0.3279  6.17  3.224  0.1335 
##  PviralDNA - Pfood      0.1912 0.0517 36.50  3.696  0.0116 
##  PviralDNA - ZymoDNA    0.0915 0.0459 34.29  1.992  0.4376 
##  COREDNA - PSoilP       1.5221 0.0665 22.13 22.877  <.0001 
##  COREDNA - EZFood       1.3635 0.3274  6.13  4.165  0.0486 
##  COREDNA - Pfood        0.4976 0.0479 31.21 10.390  <.0001 
##  COREDNA - ZymoDNA      0.3979 0.0416 37.15  9.573  <.0001 
##  COREDNA - PviralDNA    0.3064 0.0404 29.17  7.575  <.0001 
##  Mastitis - PSoilP      1.5832 0.0604 15.59 26.224  <.0001 
##  Mastitis - EZFood      1.4246 0.3260  6.03  4.370  0.0404 
##  Mastitis - Pfood       0.5587 0.0390 16.94 14.328  <.0001 
##  Mastitis - ZymoDNA     0.4590 0.0309 22.01 14.857  <.0001 
##  Mastitis - PviralDNA   0.3674 0.0363 21.07 10.123  <.0001 
##  Mastitis - COREDNA     0.0611 0.0306 25.49  1.999  0.4395 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: satterthwaite 
## P value adjustment: tukey method for comparing a family of 7 estimates
# Get fitted values from model to plot with other software
emmeans(mod.Salmonella,~ VariableKit) %>%
  summary() %>%
  data.frame() 
##   VariableKit   emmean          SE        df lower.CL upper.CL
## 1     COREDNA 5.824509 0.027438825 19.661528 5.767209 5.881808
## 2      EZFood 4.460996 0.325948885  6.025485 3.664245 5.257747
## 3    Mastitis 5.885586 0.009740565 19.997508 5.865267 5.905904
## 4       Pfood 5.326922 0.038138735 15.588851 5.245897 5.407946
## 5      PSoilP 4.302375 0.059845536 15.053862 4.174857 4.429893
## 6   PviralDNA 5.518144 0.033711890 16.795573 5.446952 5.589336
## 7     ZymoDNA 5.426595 0.029812567 19.289759 5.364260 5.488930
# Get summary
summary(emmeans(mod.Salmonella,~ VariableKit), infer=TRUE)
##  VariableKit emmean       SE    df lower.CL upper.CL t.ratio p.value
##  COREDNA      5.825 0.027439 19.66    5.767    5.882 212.273 <.0001 
##  EZFood       4.461 0.325949  6.03    3.664    5.258  13.686 <.0001 
##  Mastitis     5.886 0.009741 20.00    5.865    5.906 604.234 <.0001 
##  Pfood        5.327 0.038139 15.59    5.246    5.408 139.672 <.0001 
##  PSoilP       4.302 0.059846 15.05    4.175    4.430  71.891 <.0001 
##  PviralDNA    5.518 0.033712 16.80    5.447    5.589 163.685 <.0001 
##  ZymoDNA      5.427 0.029813 19.29    5.364    5.489 182.024 <.0001 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: satterthwaite 
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod.Salmonella_df1_Salmonella.rawdata<-Salmonella.InnOnly[c(25,42,4)]

mod.Salmonella_df2_Salmonella.model<-emmeans(mod.Salmonella,~VariableKit) %>%
  summary() %>%
  data.frame()

ggplot() +
  geom_jitter(data=mod.Salmonella_df1_Salmonella.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
  geom_point(data=mod.Salmonella_df2_Salmonella.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_errorbar(data=mod.Salmonella_df2_Salmonella.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)

# Making the plot pretty

Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")


ggplot() +
  geom_jitter(data=mod.Salmonella_df1_Salmonella.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
  scale_color_manual(values=Colors)+
   geom_errorbar(data=mod.Salmonella_df2_Salmonella.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)+
   geom_point(data=mod.Salmonella_df2_Salmonella.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Salmonella_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold")+
ylim(1.5, 7.0)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
  theme_bw()+
  ggtitle("Salmonella Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

### Salmonella Figure

library(emmeans)
# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Salmonella.best_emmeans <- emmeans(mod.Salmonella.best,pairwise~VariableKit, mode = "df.error")

# Use compact letter display for convenience
mod.Salmonella.best_cld <- CLD(mod.Salmonella.best_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Salmonella.best_cld_letters <- CLD(mod.Salmonella.best_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Salmonella.best_cld_letters
##  VariableKit emmean       SE df lower.CL upper.CL .group
##  PSoilP       4.302 0.059846 96    4.184    4.421  A    
##  EZFood       4.461 0.325949 96    3.814    5.108  AB   
##  Pfood        5.327 0.038139 96    5.251    5.403   B   
##  ZymoDNA      5.427 0.029813 96    5.367    5.486   BC  
##  PviralDNA    5.518 0.033712 96    5.451    5.585    C  
##  COREDNA      5.825 0.027439 96    5.770    5.879     D 
##  Mastitis     5.886 0.009741 96    5.866    5.905     D 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: df.error 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 7 estimates 
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(mod.Salmonella.best,~ VariableKit,mode = "df.error") %>%
  summary() %>%
  data.frame() 
##   VariableKit   emmean          SE df lower.CL upper.CL
## 1     COREDNA 5.824509 0.027438825 96 5.770043 5.878974
## 2      EZFood 4.460996 0.325948885 96 3.813992 5.107999
## 3    Mastitis 5.885586 0.009740565 96 5.866251 5.904920
## 4       Pfood 5.326922 0.038138735 96 5.251217 5.402626
## 5      PSoilP 4.302375 0.059845536 96 4.183582 4.421167
## 6   PviralDNA 5.518144 0.033711890 96 5.451226 5.585062
## 7     ZymoDNA 5.426595 0.029812567 96 5.367418 5.485772
# Get summary
summary(emmeans(mod.Salmonella.best,~ VariableKit,mode = "df.error"), infer=TRUE)
##  VariableKit emmean       SE df lower.CL upper.CL t.ratio p.value
##  COREDNA      5.825 0.027439 96    5.770    5.879 212.273 <.0001 
##  EZFood       4.461 0.325949 96    3.814    5.108  13.686 <.0001 
##  Mastitis     5.886 0.009741 96    5.866    5.905 604.234 <.0001 
##  Pfood        5.327 0.038139 96    5.251    5.403 139.672 <.0001 
##  PSoilP       4.302 0.059846 96    4.184    4.421  71.891 <.0001 
##  PviralDNA    5.518 0.033712 96    5.451    5.585 163.685 <.0001 
##  ZymoDNA      5.427 0.029813 96    5.367    5.486 182.024 <.0001 
## 
## Results are averaged over the levels of: SpikeSet 
## Degrees-of-freedom method: df.error 
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod_df1_Salmonella.rawdata<-Salmonella.InnOnly[c(25,42,4)]

mod_df2_Salmonella.best.model<-emmeans(mod.Salmonella.best,~VariableKit, mode = "df.error") %>%
  summary() %>%
  data.frame()

ggplot() +
  geom_jitter(data=mod_df1_Salmonella.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
  geom_point(data=mod_df2_Salmonella.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_errorbar(data=mod_df2_Salmonella.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)

# Making the plot pretty

Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
  
ggplot() +
  geom_jitter(data=mod_df1_Salmonella.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet)) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Salmonella.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Salmonella.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Salmonella.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
  theme_bw()+
  ggtitle("Salmonella Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.


Model not assuming homoscedasticity and including VariableKit + SpikeSet + qPCRefficiency was chosen.
qPCRefficiency is forced into all final models

Formula: mod.Salmonella = nlme::gls(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Salmonella.InnOnly, weights = varIdent(form = ~1 | VariableKit))

AIC(mod.Salmonella)
-67.22324 # best model



Other Models for Reference:

Formula: mod3.Salmonella = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=Salmonella.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod3.Salmonella)
194.3074

Previously chosen Linear Model that assumed homoscedasticity for reference:
Formula: m_Salmonella.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Salmonella.InnOnly )
AIC(m_Salmonella.LogCopiespermLofMilk3)
17.00478



Manuscript Figures: Salmonella


# Salmonella: Milk Data and Controls
Salmonella.Inn.Ctrl <- Salmonella %>% filter(VariableSampleType!="NP40InoculatedMilk")
dim(Salmonella.Inn.Ctrl)
## [1] 240  42
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2338B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")


Salmonella.Inn.Ctrl$VariableSampleType <- factor(Salmonella.Inn.Ctrl$VariableSampleType, levels=c('InoculatedMilk', 'UninoculatedMilk', 'NoTemplateControl', 'MockCommunity'))

ggplot(data=Salmonella.Inn.Ctrl, aes(VariableKit,LogCopiespermLofMilk, color= VariableKit, shape=SpikeSet))+
  scale_shape_discrete(solid=F) +
  ylab ("Salmonella Log10 Copies / mL of Milk")+  
  xlab ("Kit")+
  geom_point(aes(colour = VariableKit), size = 2, stroke = .5, position=position_jitterdodge(jitter.width=0, dodge.width = 1), show.legend = F) +
  facet_wrap(vars(VariableSampleType),nrow = 1)+
  ggtitle("Salmonella DNA Copy Numbers - All Samples and Controls")+
  theme_bw()+
  ylim(0, 9)+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=90,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Warning: Removed 100 rows containing missing values (geom_point).

ggsave("Salmonella-AllSamples.TIFF", width = 9, height = 3,units = "in", dpi = 600)
## Warning: Removed 100 rows containing missing values (geom_point).
ggplot(Salmonella.InnOnly, aes(VariableKit,LogCopiespermLofMilk,shape = factor(SpikeSet))) +
  scale_shape_discrete(solid=F) +
  geom_point(aes(colour = VariableKit), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = 1)) +
  ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Salmonella DNA Copy Numbers - Inoculated Milk Only")+
  theme_bw()+
  scale_color_manual(values=Colors)+
  theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  theme(axis.text.x=element_text(angle=25,vjust=0.5))+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

# Plot overlaying model estimates to raw data
mod_df1_Salmonella.rawdata<- Salmonella.InnOnly[c(25,42,4)]

mod_df2_Salmonella.best.model<-emmeans(mod.Salmonella.best,~VariableKit, mode = "df.error") %>%
  summary() %>%
  data.frame()

ggplot() +
  geom_jitter(data=mod_df1_Salmonella.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2,stroke = 1, width = .2 ) +
   scale_shape_discrete(solid=F) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Salmonella.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Salmonella.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Salmonella.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Salmonella DNA Copy Numbers - Inoculated Milk Only")+
   theme_bw()+
   theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

ggsave("Salmonella-Model-Jitter.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)


ggplot() +
  geom_point(data=mod_df1_Salmonella.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = .5)) +
   scale_shape_discrete(solid=F) +
  scale_color_manual(values=Colors)+
  geom_errorbar(data=mod_df2_Salmonella.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+  
  geom_point(data=mod_df2_Salmonella.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
  geom_text(data =data.frame(mod.Salmonella.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
  xlab ("Kit")+
  ggtitle("Salmonella DNA Copy Numbers - Inoculated Milk Only")+
   theme_bw()+
   theme(panel.grid.major = element_blank())+
  theme(panel.grid.minor = element_blank())+
  theme(panel.background = element_blank())+
  theme(panel.border = element_blank())+
  theme(axis.line = element_line())+
  scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood",  "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))

ggsave("Salmonella-Model.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] lmerTest_3.1-3     lme4_1.1-25        Matrix_1.2-18      nlme_3.1-150      
## [5] multcompView_0.1-8 emmeans_1.5.2-1    dplyr_1.0.2        ggplot2_3.3.2     
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.5          nloptr_1.2.2.2      plyr_1.8.6         
##  [4] pillar_1.4.7        compiler_4.0.3      tools_4.0.3        
##  [7] boot_1.3-25         statmod_1.4.35      digest_0.6.27      
## [10] lattice_0.20-41     evaluate_0.14       lifecycle_0.2.0    
## [13] tibble_3.0.4        gtable_0.3.0        pkgconfig_2.0.3    
## [16] rlang_0.4.8         cli_2.2.0           yaml_2.2.1         
## [19] mvtnorm_1.1-1       xfun_0.19           withr_2.3.0        
## [22] stringr_1.4.0       knitr_1.30          generics_0.1.0     
## [25] vctrs_0.3.5         grid_4.0.3          tidyselect_1.1.0   
## [28] glue_1.4.2          R6_2.5.0            fansi_0.4.1        
## [31] rmarkdown_2.5       minqa_1.2.4         purrr_0.3.4        
## [34] farver_2.0.3        magrittr_2.0.1      MASS_7.3-53        
## [37] splines_4.0.3       scales_1.1.1        ellipsis_0.3.1     
## [40] htmltools_0.5.0     assertthat_0.2.1    colorspace_2.0-0   
## [43] xtable_1.8-4        numDeriv_2016.8-1.1 labeling_0.4.2     
## [46] utf8_1.1.4          stringi_1.5.3       estimability_1.3   
## [49] munsell_0.5.0       crayon_1.3.4